LCOV - code coverage report
Current view: top level - fs/btrfs - extent-tree.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 2545 3597 70.8 %
Date: 2014-11-28 Functions: 153 183 83.6 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : #include <linux/sched.h>
      19             : #include <linux/pagemap.h>
      20             : #include <linux/writeback.h>
      21             : #include <linux/blkdev.h>
      22             : #include <linux/sort.h>
      23             : #include <linux/rcupdate.h>
      24             : #include <linux/kthread.h>
      25             : #include <linux/slab.h>
      26             : #include <linux/ratelimit.h>
      27             : #include <linux/percpu_counter.h>
      28             : #include "hash.h"
      29             : #include "tree-log.h"
      30             : #include "disk-io.h"
      31             : #include "print-tree.h"
      32             : #include "volumes.h"
      33             : #include "raid56.h"
      34             : #include "locking.h"
      35             : #include "free-space-cache.h"
      36             : #include "math.h"
      37             : #include "sysfs.h"
      38             : #include "qgroup.h"
      39             : 
      40             : #undef SCRAMBLE_DELAYED_REFS
      41             : 
      42             : /*
      43             :  * control flags for do_chunk_alloc's force field
      44             :  * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
      45             :  * if we really need one.
      46             :  *
      47             :  * CHUNK_ALLOC_LIMITED means to only try and allocate one
      48             :  * if we have very few chunks already allocated.  This is
      49             :  * used as part of the clustering code to help make sure
      50             :  * we have a good pool of storage to cluster in, without
      51             :  * filling the FS with empty chunks
      52             :  *
      53             :  * CHUNK_ALLOC_FORCE means it must try to allocate one
      54             :  *
      55             :  */
      56             : enum {
      57             :         CHUNK_ALLOC_NO_FORCE = 0,
      58             :         CHUNK_ALLOC_LIMITED = 1,
      59             :         CHUNK_ALLOC_FORCE = 2,
      60             : };
      61             : 
      62             : /*
      63             :  * Control how reservations are dealt with.
      64             :  *
      65             :  * RESERVE_FREE - freeing a reservation.
      66             :  * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
      67             :  *   ENOSPC accounting
      68             :  * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
      69             :  *   bytes_may_use as the ENOSPC accounting is done elsewhere
      70             :  */
      71             : enum {
      72             :         RESERVE_FREE = 0,
      73             :         RESERVE_ALLOC = 1,
      74             :         RESERVE_ALLOC_NO_ACCOUNT = 2,
      75             : };
      76             : 
      77             : static int update_block_group(struct btrfs_root *root,
      78             :                               u64 bytenr, u64 num_bytes, int alloc);
      79             : static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
      80             :                                 struct btrfs_root *root,
      81             :                                 u64 bytenr, u64 num_bytes, u64 parent,
      82             :                                 u64 root_objectid, u64 owner_objectid,
      83             :                                 u64 owner_offset, int refs_to_drop,
      84             :                                 struct btrfs_delayed_extent_op *extra_op,
      85             :                                 int no_quota);
      86             : static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
      87             :                                     struct extent_buffer *leaf,
      88             :                                     struct btrfs_extent_item *ei);
      89             : static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
      90             :                                       struct btrfs_root *root,
      91             :                                       u64 parent, u64 root_objectid,
      92             :                                       u64 flags, u64 owner, u64 offset,
      93             :                                       struct btrfs_key *ins, int ref_mod);
      94             : static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
      95             :                                      struct btrfs_root *root,
      96             :                                      u64 parent, u64 root_objectid,
      97             :                                      u64 flags, struct btrfs_disk_key *key,
      98             :                                      int level, struct btrfs_key *ins,
      99             :                                      int no_quota);
     100             : static int do_chunk_alloc(struct btrfs_trans_handle *trans,
     101             :                           struct btrfs_root *extent_root, u64 flags,
     102             :                           int force);
     103             : static int find_next_key(struct btrfs_path *path, int level,
     104             :                          struct btrfs_key *key);
     105             : static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
     106             :                             int dump_block_groups);
     107             : static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
     108             :                                        u64 num_bytes, int reserve,
     109             :                                        int delalloc);
     110             : static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
     111             :                                u64 num_bytes);
     112             : int btrfs_pin_extent(struct btrfs_root *root,
     113             :                      u64 bytenr, u64 num_bytes, int reserved);
     114             : 
     115             : static noinline int
     116      155859 : block_group_cache_done(struct btrfs_block_group_cache *cache)
     117             : {
     118      155859 :         smp_mb();
     119      155864 :         return cache->cached == BTRFS_CACHE_FINISHED ||
     120             :                 cache->cached == BTRFS_CACHE_ERROR;
     121             : }
     122             : 
     123             : static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
     124             : {
     125      209825 :         return (cache->flags & bits) == bits;
     126             : }
     127             : 
     128             : static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
     129             : {
     130      793807 :         atomic_inc(&cache->count);
     131             : }
     132             : 
     133      795183 : void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
     134             : {
     135     1590449 :         if (atomic_dec_and_test(&cache->count)) {
     136        1228 :                 WARN_ON(cache->pinned > 0);
     137        1228 :                 WARN_ON(cache->reserved > 0);
     138        1228 :                 kfree(cache->free_space_ctl);
     139        1228 :                 kfree(cache);
     140             :         }
     141      795266 : }
     142             : 
     143             : /*
     144             :  * this adds the block group to the fs_info rb tree for the block group
     145             :  * cache
     146             :  */
     147        1228 : static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
     148             :                                 struct btrfs_block_group_cache *block_group)
     149             : {
     150             :         struct rb_node **p;
     151             :         struct rb_node *parent = NULL;
     152             :         struct btrfs_block_group_cache *cache;
     153             : 
     154             :         spin_lock(&info->block_group_cache_lock);
     155        1228 :         p = &info->block_group_cache_tree.rb_node;
     156             : 
     157        4562 :         while (*p) {
     158             :                 parent = *p;
     159             :                 cache = rb_entry(parent, struct btrfs_block_group_cache,
     160             :                                  cache_node);
     161        2106 :                 if (block_group->key.objectid < cache->key.objectid) {
     162           0 :                         p = &(*p)->rb_left;
     163        2106 :                 } else if (block_group->key.objectid > cache->key.objectid) {
     164        2106 :                         p = &(*p)->rb_right;
     165             :                 } else {
     166             :                         spin_unlock(&info->block_group_cache_lock);
     167           0 :                         return -EEXIST;
     168             :                 }
     169             :         }
     170             : 
     171        1228 :         rb_link_node(&block_group->cache_node, parent, p);
     172        1228 :         rb_insert_color(&block_group->cache_node,
     173             :                         &info->block_group_cache_tree);
     174             : 
     175        1228 :         if (info->first_logical_byte > block_group->key.objectid)
     176         221 :                 info->first_logical_byte = block_group->key.objectid;
     177             : 
     178             :         spin_unlock(&info->block_group_cache_lock);
     179             : 
     180        1228 :         return 0;
     181             : }
     182             : 
     183             : /*
     184             :  * This will return the block group at or after bytenr if contains is 0, else
     185             :  * it will return the block group that contains the bytenr
     186             :  */
     187             : static struct btrfs_block_group_cache *
     188      479918 : block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
     189             :                               int contains)
     190             : {
     191             :         struct btrfs_block_group_cache *cache, *ret = NULL;
     192             :         struct rb_node *n;
     193             :         u64 end, start;
     194             : 
     195             :         spin_lock(&info->block_group_cache_lock);
     196      479965 :         n = info->block_group_cache_tree.rb_node;
     197             : 
     198     1936016 :         while (n) {
     199     1444262 :                 cache = rb_entry(n, struct btrfs_block_group_cache,
     200             :                                  cache_node);
     201     1444262 :                 end = cache->key.objectid + cache->key.offset - 1;
     202             :                 start = cache->key.objectid;
     203             : 
     204     1444262 :                 if (bytenr < start) {
     205      327861 :                         if (!contains && (!ret || start < ret->key.objectid))
     206             :                                 ret = cache;
     207      327861 :                         n = n->rb_left;
     208     1116401 :                 } else if (bytenr > start) {
     209     1000827 :                         if (contains && bytenr <= end) {
     210             :                                 ret = cache;
     211             :                                 break;
     212             :                         }
     213      648225 :                         n = n->rb_right;
     214             :                 } else {
     215             :                         ret = cache;
     216             :                         break;
     217             :                 }
     218             :         }
     219      479965 :         if (ret) {
     220             :                 btrfs_get_block_group(ret);
     221      474945 :                 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
     222          22 :                         info->first_logical_byte = ret->key.objectid;
     223             :         }
     224             :         spin_unlock(&info->block_group_cache_lock);
     225             : 
     226      479964 :         return ret;
     227             : }
     228             : 
     229         689 : static int add_excluded_extent(struct btrfs_root *root,
     230             :                                u64 start, u64 num_bytes)
     231             : {
     232         689 :         u64 end = start + num_bytes - 1;
     233         689 :         set_extent_bits(&root->fs_info->freed_extents[0],
     234             :                         start, end, EXTENT_UPTODATE, GFP_NOFS);
     235         689 :         set_extent_bits(&root->fs_info->freed_extents[1],
     236             :                         start, end, EXTENT_UPTODATE, GFP_NOFS);
     237         689 :         return 0;
     238             : }
     239             : 
     240        1300 : static void free_excluded_extents(struct btrfs_root *root,
     241             :                                   struct btrfs_block_group_cache *cache)
     242             : {
     243             :         u64 start, end;
     244             : 
     245        1300 :         start = cache->key.objectid;
     246        1300 :         end = start + cache->key.offset - 1;
     247             : 
     248        1300 :         clear_extent_bits(&root->fs_info->freed_extents[0],
     249             :                           start, end, EXTENT_UPTODATE, GFP_NOFS);
     250        1300 :         clear_extent_bits(&root->fs_info->freed_extents[1],
     251             :                           start, end, EXTENT_UPTODATE, GFP_NOFS);
     252        1300 : }
     253             : 
     254        1228 : static int exclude_super_stripes(struct btrfs_root *root,
     255             :                                  struct btrfs_block_group_cache *cache)
     256             : {
     257             :         u64 bytenr;
     258             :         u64 *logical;
     259             :         int stripe_len;
     260             :         int i, nr, ret;
     261             : 
     262        1228 :         if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
     263         217 :                 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
     264         217 :                 cache->bytes_super += stripe_len;
     265         217 :                 ret = add_excluded_extent(root, cache->key.objectid,
     266             :                                           stripe_len);
     267         217 :                 if (ret)
     268             :                         return ret;
     269             :         }
     270             : 
     271        3684 :         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
     272             :                 bytenr = btrfs_sb_offset(i);
     273        3684 :                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
     274             :                                        cache->key.objectid, bytenr,
     275             :                                        0, &logical, &nr, &stripe_len);
     276        3684 :                 if (ret)
     277             :                         return ret;
     278             : 
     279        4156 :                 while (nr--) {
     280             :                         u64 start, len;
     281             : 
     282         944 :                         if (logical[nr] > cache->key.objectid +
     283         472 :                             cache->key.offset)
     284           0 :                                 continue;
     285             : 
     286         472 :                         if (logical[nr] + stripe_len <= cache->key.objectid)
     287           0 :                                 continue;
     288             : 
     289             :                         start = logical[nr];
     290         472 :                         if (start < cache->key.objectid) {
     291             :                                 start = cache->key.objectid;
     292           0 :                                 len = (logical[nr] + stripe_len) - start;
     293             :                         } else {
     294         472 :                                 len = min_t(u64, stripe_len,
     295             :                                             cache->key.objectid +
     296             :                                             cache->key.offset - start);
     297             :                         }
     298             : 
     299         472 :                         cache->bytes_super += len;
     300         472 :                         ret = add_excluded_extent(root, start, len);
     301         472 :                         if (ret) {
     302           0 :                                 kfree(logical);
     303           0 :                                 return ret;
     304             :                         }
     305             :                 }
     306             : 
     307        3684 :                 kfree(logical);
     308             :         }
     309             :         return 0;
     310             : }
     311             : 
     312             : static struct btrfs_caching_control *
     313         128 : get_caching_control(struct btrfs_block_group_cache *cache)
     314             : {
     315             :         struct btrfs_caching_control *ctl;
     316             : 
     317             :         spin_lock(&cache->lock);
     318         128 :         if (cache->cached != BTRFS_CACHE_STARTED) {
     319             :                 spin_unlock(&cache->lock);
     320           0 :                 return NULL;
     321             :         }
     322             : 
     323             :         /* We're loading it the fast way, so we don't have a caching_ctl. */
     324         128 :         if (!cache->caching_ctl) {
     325             :                 spin_unlock(&cache->lock);
     326           0 :                 return NULL;
     327             :         }
     328             : 
     329             :         ctl = cache->caching_ctl;
     330         128 :         atomic_inc(&ctl->count);
     331             :         spin_unlock(&cache->lock);
     332         128 :         return ctl;
     333             : }
     334             : 
     335         677 : static void put_caching_control(struct btrfs_caching_control *ctl)
     336             : {
     337        1354 :         if (atomic_dec_and_test(&ctl->count))
     338         343 :                 kfree(ctl);
     339         677 : }
     340             : 
     341             : /*
     342             :  * this is only called by cache_block_group, since we could have freed extents
     343             :  * we need to check the pinned_extents for any extents that can't be used yet
     344             :  * since their free space will be released as soon as the transaction commits.
     345             :  */
     346        8152 : static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
     347             :                               struct btrfs_fs_info *info, u64 start, u64 end)
     348             : {
     349             :         u64 extent_start, extent_end, size, total_added = 0;
     350             :         int ret;
     351             : 
     352        7263 :         while (start < end) {
     353        1439 :                 ret = find_first_extent_bit(info->pinned_extents, start,
     354             :                                             &extent_start, &extent_end,
     355             :                                             EXTENT_DIRTY | EXTENT_UPTODATE,
     356             :                                             NULL);
     357        1439 :                 if (ret)
     358             :                         break;
     359             : 
     360         662 :                 if (extent_start <= start) {
     361         214 :                         start = extent_end + 1;
     362         448 :                 } else if (extent_start > start && extent_start < end) {
     363         122 :                         size = extent_start - start;
     364         122 :                         total_added += size;
     365             :                         ret = btrfs_add_free_space(block_group, start,
     366             :                                                    size);
     367         122 :                         BUG_ON(ret); /* -ENOMEM or logic error */
     368         122 :                         start = extent_end + 1;
     369             :                 } else {
     370             :                         break;
     371             :                 }
     372             :         }
     373             : 
     374        6927 :         if (start < end) {
     375        1103 :                 size = end - start;
     376        1103 :                 total_added += size;
     377             :                 ret = btrfs_add_free_space(block_group, start, size);
     378        1103 :                 BUG_ON(ret); /* -ENOMEM or logic error */
     379             :         }
     380             : 
     381        6927 :         return total_added;
     382             : }
     383             : 
     384         206 : static noinline void caching_thread(struct btrfs_work *work)
     385             : {
     386             :         struct btrfs_block_group_cache *block_group;
     387             :         struct btrfs_fs_info *fs_info;
     388             :         struct btrfs_caching_control *caching_ctl;
     389             :         struct btrfs_root *extent_root;
     390             :         struct btrfs_path *path;
     391         406 :         struct extent_buffer *leaf;
     392             :         struct btrfs_key key;
     393             :         u64 total_found = 0;
     394             :         u64 last = 0;
     395             :         u32 nritems;
     396             :         int ret = -ENOMEM;
     397             : 
     398         206 :         caching_ctl = container_of(work, struct btrfs_caching_control, work);
     399         206 :         block_group = caching_ctl->block_group;
     400         206 :         fs_info = block_group->fs_info;
     401         206 :         extent_root = fs_info->extent_root;
     402             : 
     403         206 :         path = btrfs_alloc_path();
     404         206 :         if (!path)
     405             :                 goto out;
     406             : 
     407         206 :         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
     408             : 
     409             :         /*
     410             :          * We don't want to deadlock with somebody trying to allocate a new
     411             :          * extent for the extent root while also trying to search the extent
     412             :          * root to add free space.  So we skip locking and search the commit
     413             :          * root, since its read-only
     414             :          */
     415         206 :         path->skip_locking = 1;
     416         206 :         path->search_commit_root = 1;
     417         206 :         path->reada = 1;
     418             : 
     419         206 :         key.objectid = last;
     420         206 :         key.offset = 0;
     421         206 :         key.type = BTRFS_EXTENT_ITEM_KEY;
     422             : again:
     423         206 :         mutex_lock(&caching_ctl->mutex);
     424             :         /* need to make sure the commit_root doesn't disappear */
     425         206 :         down_read(&fs_info->commit_root_sem);
     426             : 
     427             : next:
     428         370 :         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
     429         370 :         if (ret < 0)
     430             :                 goto err;
     431             : 
     432         370 :         leaf = path->nodes[0];
     433             :         nritems = btrfs_header_nritems(leaf);
     434             : 
     435             :         while (1) {
     436        6546 :                 if (btrfs_fs_closing(fs_info) > 1) {
     437             :                         last = (u64)-1;
     438             :                         break;
     439             :                 }
     440             : 
     441        6546 :                 if (path->slots[0] < nritems) {
     442        6421 :                         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     443             :                 } else {
     444         125 :                         ret = find_next_key(path, 0, &key);
     445         125 :                         if (ret)
     446             :                                 break;
     447             : 
     448          72 :                         if (need_resched() ||
     449             :                             rwsem_is_contended(&fs_info->commit_root_sem)) {
     450           0 :                                 caching_ctl->progress = last;
     451           0 :                                 btrfs_release_path(path);
     452           0 :                                 up_read(&fs_info->commit_root_sem);
     453           0 :                                 mutex_unlock(&caching_ctl->mutex);
     454           0 :                                 cond_resched();
     455           0 :                                 goto again;
     456             :                         }
     457             : 
     458          36 :                         ret = btrfs_next_leaf(extent_root, path);
     459          36 :                         if (ret < 0)
     460             :                                 goto err;
     461          36 :                         if (ret)
     462             :                                 break;
     463          36 :                         leaf = path->nodes[0];
     464             :                         nritems = btrfs_header_nritems(leaf);
     465          36 :                         continue;
     466             :                 }
     467             : 
     468        6421 :                 if (key.objectid < last) {
     469         164 :                         key.objectid = last;
     470         164 :                         key.offset = 0;
     471         164 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
     472             : 
     473         164 :                         caching_ctl->progress = last;
     474         164 :                         btrfs_release_path(path);
     475         164 :                         goto next;
     476             :                 }
     477             : 
     478        6257 :                 if (key.objectid < block_group->key.objectid) {
     479           0 :                         path->slots[0]++;
     480           0 :                         continue;
     481             :                 }
     482             : 
     483        6257 :                 if (key.objectid >= block_group->key.objectid +
     484        6257 :                     block_group->key.offset)
     485             :                         break;
     486             : 
     487        6140 :                 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
     488             :                     key.type == BTRFS_METADATA_ITEM_KEY) {
     489        6101 :                         total_found += add_new_free_space(block_group,
     490             :                                                           fs_info, last,
     491             :                                                           key.objectid);
     492        6101 :                         if (key.type == BTRFS_METADATA_ITEM_KEY)
     493           0 :                                 last = key.objectid +
     494           0 :                                         fs_info->tree_root->leafsize;
     495             :                         else
     496        6101 :                                 last = key.objectid + key.offset;
     497             : 
     498        6101 :                         if (total_found > (1024 * 1024 * 2)) {
     499             :                                 total_found = 0;
     500          15 :                                 wake_up(&caching_ctl->wait);
     501             :                         }
     502             :                 }
     503        6140 :                 path->slots[0]++;
     504             :         }
     505             :         ret = 0;
     506             : 
     507         206 :         total_found += add_new_free_space(block_group, fs_info, last,
     508         206 :                                           block_group->key.objectid +
     509         206 :                                           block_group->key.offset);
     510         206 :         caching_ctl->progress = (u64)-1;
     511             : 
     512             :         spin_lock(&block_group->lock);
     513         206 :         block_group->caching_ctl = NULL;
     514         206 :         block_group->cached = BTRFS_CACHE_FINISHED;
     515             :         spin_unlock(&block_group->lock);
     516             : 
     517             : err:
     518         206 :         btrfs_free_path(path);
     519         206 :         up_read(&fs_info->commit_root_sem);
     520             : 
     521         206 :         free_excluded_extents(extent_root, block_group);
     522             : 
     523         206 :         mutex_unlock(&caching_ctl->mutex);
     524             : out:
     525         206 :         if (ret) {
     526             :                 spin_lock(&block_group->lock);
     527           0 :                 block_group->caching_ctl = NULL;
     528           0 :                 block_group->cached = BTRFS_CACHE_ERROR;
     529             :                 spin_unlock(&block_group->lock);
     530             :         }
     531         206 :         wake_up(&caching_ctl->wait);
     532             : 
     533         206 :         put_caching_control(caching_ctl);
     534         206 :         btrfs_put_block_group(block_group);
     535         206 : }
     536             : 
     537         517 : static int cache_block_group(struct btrfs_block_group_cache *cache,
     538             :                              int load_cache_only)
     539             : {
     540        1034 :         DEFINE_WAIT(wait);
     541         517 :         struct btrfs_fs_info *fs_info = cache->fs_info;
     542             :         struct btrfs_caching_control *caching_ctl;
     543             :         int ret = 0;
     544             : 
     545         517 :         caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
     546         517 :         if (!caching_ctl)
     547             :                 return -ENOMEM;
     548             : 
     549         517 :         INIT_LIST_HEAD(&caching_ctl->list);
     550         517 :         mutex_init(&caching_ctl->mutex);
     551         517 :         init_waitqueue_head(&caching_ctl->wait);
     552         517 :         caching_ctl->block_group = cache;
     553         517 :         caching_ctl->progress = cache->key.objectid;
     554             :         atomic_set(&caching_ctl->count, 1);
     555         517 :         btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
     556             :                         caching_thread, NULL, NULL);
     557             : 
     558             :         spin_lock(&cache->lock);
     559             :         /*
     560             :          * This should be a rare occasion, but this could happen I think in the
     561             :          * case where one thread starts to load the space cache info, and then
     562             :          * some other thread starts a transaction commit which tries to do an
     563             :          * allocation while the other thread is still loading the space cache
     564             :          * info.  The previous loop should have kept us from choosing this block
     565             :          * group, but if we've moved to the state where we will wait on caching
     566             :          * block groups we need to first check if we're doing a fast load here,
     567             :          * so we can wait for it to finish, otherwise we could end up allocating
     568             :          * from a block group who's cache gets evicted for one reason or
     569             :          * another.
     570             :          */
     571         517 :         while (cache->cached == BTRFS_CACHE_FAST) {
     572             :                 struct btrfs_caching_control *ctl;
     573             : 
     574           0 :                 ctl = cache->caching_ctl;
     575           0 :                 atomic_inc(&ctl->count);
     576           0 :                 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
     577             :                 spin_unlock(&cache->lock);
     578             : 
     579           0 :                 schedule();
     580             : 
     581           0 :                 finish_wait(&ctl->wait, &wait);
     582           0 :                 put_caching_control(ctl);
     583             :                 spin_lock(&cache->lock);
     584             :         }
     585             : 
     586         517 :         if (cache->cached != BTRFS_CACHE_NO) {
     587             :                 spin_unlock(&cache->lock);
     588         174 :                 kfree(caching_ctl);
     589         174 :                 return 0;
     590             :         }
     591         343 :         WARN_ON(cache->caching_ctl);
     592         343 :         cache->caching_ctl = caching_ctl;
     593         343 :         cache->cached = BTRFS_CACHE_FAST;
     594             :         spin_unlock(&cache->lock);
     595             : 
     596         343 :         if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
     597         343 :                 ret = load_free_space_cache(fs_info, cache);
     598             : 
     599             :                 spin_lock(&cache->lock);
     600         343 :                 if (ret == 1) {
     601         135 :                         cache->caching_ctl = NULL;
     602         135 :                         cache->cached = BTRFS_CACHE_FINISHED;
     603         135 :                         cache->last_byte_to_unpin = (u64)-1;
     604             :                 } else {
     605         208 :                         if (load_cache_only) {
     606           2 :                                 cache->caching_ctl = NULL;
     607           2 :                                 cache->cached = BTRFS_CACHE_NO;
     608             :                         } else {
     609         206 :                                 cache->cached = BTRFS_CACHE_STARTED;
     610             :                         }
     611             :                 }
     612             :                 spin_unlock(&cache->lock);
     613         343 :                 wake_up(&caching_ctl->wait);
     614         343 :                 if (ret == 1) {
     615         135 :                         put_caching_control(caching_ctl);
     616         135 :                         free_excluded_extents(fs_info->extent_root, cache);
     617         135 :                         return 0;
     618             :                 }
     619             :         } else {
     620             :                 /*
     621             :                  * We are not going to do the fast caching, set cached to the
     622             :                  * appropriate value and wakeup any waiters.
     623             :                  */
     624             :                 spin_lock(&cache->lock);
     625           0 :                 if (load_cache_only) {
     626           0 :                         cache->caching_ctl = NULL;
     627           0 :                         cache->cached = BTRFS_CACHE_NO;
     628             :                 } else {
     629           0 :                         cache->cached = BTRFS_CACHE_STARTED;
     630             :                 }
     631             :                 spin_unlock(&cache->lock);
     632           0 :                 wake_up(&caching_ctl->wait);
     633             :         }
     634             : 
     635         208 :         if (load_cache_only) {
     636           2 :                 put_caching_control(caching_ctl);
     637           2 :                 return 0;
     638             :         }
     639             : 
     640         206 :         down_write(&fs_info->commit_root_sem);
     641         206 :         atomic_inc(&caching_ctl->count);
     642         206 :         list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
     643         206 :         up_write(&fs_info->commit_root_sem);
     644             : 
     645             :         btrfs_get_block_group(cache);
     646             : 
     647         206 :         btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
     648             : 
     649         206 :         return ret;
     650             : }
     651             : 
     652             : /*
     653             :  * return the block group that starts at or after bytenr
     654             :  */
     655             : static struct btrfs_block_group_cache *
     656             : btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
     657             : {
     658             :         struct btrfs_block_group_cache *cache;
     659             : 
     660       57628 :         cache = block_group_cache_tree_search(info, bytenr, 0);
     661             : 
     662             :         return cache;
     663             : }
     664             : 
     665             : /*
     666             :  * return the block group that contains the given bytenr
     667             :  */
     668       46216 : struct btrfs_block_group_cache *btrfs_lookup_block_group(
     669             :                                                  struct btrfs_fs_info *info,
     670             :                                                  u64 bytenr)
     671             : {
     672             :         struct btrfs_block_group_cache *cache;
     673             : 
     674      422334 :         cache = block_group_cache_tree_search(info, bytenr, 1);
     675             : 
     676       46217 :         return cache;
     677             : }
     678             : 
     679             : static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
     680             :                                                   u64 flags)
     681             : {
     682      215939 :         struct list_head *head = &info->space_info;
     683             :         struct btrfs_space_info *found;
     684             : 
     685      200290 :         flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
     686             : 
     687             :         rcu_read_lock();
     688      311267 :         list_for_each_entry_rcu(found, head, list) {
     689      310610 :                 if (found->flags & flags) {
     690             :                         rcu_read_unlock();
     691             :                         return found;
     692             :                 }
     693             :         }
     694             :         rcu_read_unlock();
     695             :         return NULL;
     696             : }
     697             : 
     698             : /*
     699             :  * after adding space to the filesystem, we need to clear the full flags
     700             :  * on all the space infos.
     701             :  */
     702           0 : void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
     703             : {
     704          72 :         struct list_head *head = &info->space_info;
     705             :         struct btrfs_space_info *found;
     706             : 
     707             :         rcu_read_lock();
     708         288 :         list_for_each_entry_rcu(found, head, list)
     709         216 :                 found->full = 0;
     710             :         rcu_read_unlock();
     711           0 : }
     712             : 
     713             : /* simple helper to search for an existing extent at a given offset */
     714           0 : int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
     715             : {
     716             :         int ret;
     717             :         struct btrfs_key key;
     718             :         struct btrfs_path *path;
     719             : 
     720           0 :         path = btrfs_alloc_path();
     721           0 :         if (!path)
     722             :                 return -ENOMEM;
     723             : 
     724           0 :         key.objectid = start;
     725           0 :         key.offset = len;
     726           0 :         key.type = BTRFS_EXTENT_ITEM_KEY;
     727           0 :         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
     728             :                                 0, 0);
     729           0 :         if (ret > 0) {
     730           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
     731           0 :                 if (key.objectid == start &&
     732           0 :                     key.type == BTRFS_METADATA_ITEM_KEY)
     733             :                         ret = 0;
     734             :         }
     735           0 :         btrfs_free_path(path);
     736           0 :         return ret;
     737             : }
     738             : 
     739             : /*
     740             :  * helper function to lookup reference count and flags of a tree block.
     741             :  *
     742             :  * the head node for delayed ref is used to store the sum of all the
     743             :  * reference count modifications queued up in the rbtree. the head
     744             :  * node may also store the extent flags to set. This way you can check
     745             :  * to see what the reference count and extent flags would be if all of
     746             :  * the delayed refs are not processed.
     747             :  */
     748        1418 : int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
     749             :                              struct btrfs_root *root, u64 bytenr,
     750             :                              u64 offset, int metadata, u64 *refs, u64 *flags)
     751             : {
     752             :         struct btrfs_delayed_ref_head *head;
     753             :         struct btrfs_delayed_ref_root *delayed_refs;
     754             :         struct btrfs_path *path;
     755             :         struct btrfs_extent_item *ei;
     756             :         struct extent_buffer *leaf;
     757             :         struct btrfs_key key;
     758             :         u32 item_size;
     759             :         u64 num_refs;
     760             :         u64 extent_flags;
     761             :         int ret;
     762             : 
     763             :         /*
     764             :          * If we don't have skinny metadata, don't bother doing anything
     765             :          * different
     766             :          */
     767        2836 :         if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
     768        1418 :                 offset = root->leafsize;
     769             :                 metadata = 0;
     770             :         }
     771             : 
     772        1418 :         path = btrfs_alloc_path();
     773        1418 :         if (!path)
     774             :                 return -ENOMEM;
     775             : 
     776        1418 :         if (!trans) {
     777           1 :                 path->skip_locking = 1;
     778           1 :                 path->search_commit_root = 1;
     779             :         }
     780             : 
     781             : search_again:
     782        1418 :         key.objectid = bytenr;
     783        1418 :         key.offset = offset;
     784        1418 :         if (metadata)
     785           0 :                 key.type = BTRFS_METADATA_ITEM_KEY;
     786             :         else
     787        1418 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
     788             : 
     789             : again:
     790        1418 :         ret = btrfs_search_slot(trans, root->fs_info->extent_root,
     791             :                                 &key, path, 0, 0);
     792        1418 :         if (ret < 0)
     793             :                 goto out_free;
     794             : 
     795        1418 :         if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
     796           0 :                 if (path->slots[0]) {
     797           0 :                         path->slots[0]--;
     798           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
     799             :                                               path->slots[0]);
     800           0 :                         if (key.objectid == bytenr &&
     801           0 :                             key.type == BTRFS_EXTENT_ITEM_KEY &&
     802           0 :                             key.offset == root->leafsize)
     803             :                                 ret = 0;
     804             :                 }
     805           0 :                 if (ret) {
     806           0 :                         key.objectid = bytenr;
     807           0 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
     808           0 :                         key.offset = root->leafsize;
     809           0 :                         btrfs_release_path(path);
     810           0 :                         goto again;
     811             :                 }
     812             :         }
     813             : 
     814        1418 :         if (ret == 0) {
     815        1290 :                 leaf = path->nodes[0];
     816        1290 :                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
     817        1290 :                 if (item_size >= sizeof(*ei)) {
     818        2580 :                         ei = btrfs_item_ptr(leaf, path->slots[0],
     819             :                                             struct btrfs_extent_item);
     820             :                         num_refs = btrfs_extent_refs(leaf, ei);
     821             :                         extent_flags = btrfs_extent_flags(leaf, ei);
     822             :                 } else {
     823             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
     824             :                         struct btrfs_extent_item_v0 *ei0;
     825           0 :                         BUG_ON(item_size != sizeof(*ei0));
     826           0 :                         ei0 = btrfs_item_ptr(leaf, path->slots[0],
     827             :                                              struct btrfs_extent_item_v0);
     828           0 :                         num_refs = btrfs_extent_refs_v0(leaf, ei0);
     829             :                         /* FIXME: this isn't correct for data */
     830             :                         extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
     831             : #else
     832             :                         BUG();
     833             : #endif
     834             :                 }
     835        1290 :                 BUG_ON(num_refs == 0);
     836             :         } else {
     837             :                 num_refs = 0;
     838             :                 extent_flags = 0;
     839             :                 ret = 0;
     840             :         }
     841             : 
     842        1418 :         if (!trans)
     843             :                 goto out;
     844             : 
     845        1417 :         delayed_refs = &trans->transaction->delayed_refs;
     846             :         spin_lock(&delayed_refs->lock);
     847        1417 :         head = btrfs_find_delayed_ref_head(trans, bytenr);
     848        1417 :         if (head) {
     849         337 :                 if (!mutex_trylock(&head->mutex)) {
     850           0 :                         atomic_inc(&head->node.refs);
     851             :                         spin_unlock(&delayed_refs->lock);
     852             : 
     853           0 :                         btrfs_release_path(path);
     854             : 
     855             :                         /*
     856             :                          * Mutex was contended, block until it's released and try
     857             :                          * again
     858             :                          */
     859           0 :                         mutex_lock(&head->mutex);
     860           0 :                         mutex_unlock(&head->mutex);
     861           0 :                         btrfs_put_delayed_ref(&head->node);
     862           0 :                         goto search_again;
     863             :                 }
     864             :                 spin_lock(&head->lock);
     865         337 :                 if (head->extent_op && head->extent_op->update_flags)
     866         135 :                         extent_flags |= head->extent_op->flags_to_set;
     867             :                 else
     868         202 :                         BUG_ON(num_refs == 0);
     869             : 
     870         337 :                 num_refs += head->node.ref_mod;
     871             :                 spin_unlock(&head->lock);
     872         337 :                 mutex_unlock(&head->mutex);
     873             :         }
     874             :         spin_unlock(&delayed_refs->lock);
     875             : out:
     876        1418 :         WARN_ON(num_refs == 0);
     877        1418 :         if (refs)
     878        1417 :                 *refs = num_refs;
     879        1418 :         if (flags)
     880        1418 :                 *flags = extent_flags;
     881             : out_free:
     882        1418 :         btrfs_free_path(path);
     883        1418 :         return ret;
     884             : }
     885             : 
     886             : /*
     887             :  * Back reference rules.  Back refs have three main goals:
     888             :  *
     889             :  * 1) differentiate between all holders of references to an extent so that
     890             :  *    when a reference is dropped we can make sure it was a valid reference
     891             :  *    before freeing the extent.
     892             :  *
     893             :  * 2) Provide enough information to quickly find the holders of an extent
     894             :  *    if we notice a given block is corrupted or bad.
     895             :  *
     896             :  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
     897             :  *    maintenance.  This is actually the same as #2, but with a slightly
     898             :  *    different use case.
     899             :  *
     900             :  * There are two kinds of back refs. The implicit back refs is optimized
     901             :  * for pointers in non-shared tree blocks. For a given pointer in a block,
     902             :  * back refs of this kind provide information about the block's owner tree
     903             :  * and the pointer's key. These information allow us to find the block by
     904             :  * b-tree searching. The full back refs is for pointers in tree blocks not
     905             :  * referenced by their owner trees. The location of tree block is recorded
     906             :  * in the back refs. Actually the full back refs is generic, and can be
     907             :  * used in all cases the implicit back refs is used. The major shortcoming
     908             :  * of the full back refs is its overhead. Every time a tree block gets
     909             :  * COWed, we have to update back refs entry for all pointers in it.
     910             :  *
     911             :  * For a newly allocated tree block, we use implicit back refs for
     912             :  * pointers in it. This means most tree related operations only involve
     913             :  * implicit back refs. For a tree block created in old transaction, the
     914             :  * only way to drop a reference to it is COW it. So we can detect the
     915             :  * event that tree block loses its owner tree's reference and do the
     916             :  * back refs conversion.
     917             :  *
     918             :  * When a tree block is COW'd through a tree, there are four cases:
     919             :  *
     920             :  * The reference count of the block is one and the tree is the block's
     921             :  * owner tree. Nothing to do in this case.
     922             :  *
     923             :  * The reference count of the block is one and the tree is not the
     924             :  * block's owner tree. In this case, full back refs is used for pointers
     925             :  * in the block. Remove these full back refs, add implicit back refs for
     926             :  * every pointers in the new block.
     927             :  *
     928             :  * The reference count of the block is greater than one and the tree is
     929             :  * the block's owner tree. In this case, implicit back refs is used for
     930             :  * pointers in the block. Add full back refs for every pointers in the
     931             :  * block, increase lower level extents' reference counts. The original
     932             :  * implicit back refs are entailed to the new block.
     933             :  *
     934             :  * The reference count of the block is greater than one and the tree is
     935             :  * not the block's owner tree. Add implicit back refs for every pointer in
     936             :  * the new block, increase lower level extents' reference count.
     937             :  *
     938             :  * Back Reference Key composing:
     939             :  *
     940             :  * The key objectid corresponds to the first byte in the extent,
     941             :  * The key type is used to differentiate between types of back refs.
     942             :  * There are different meanings of the key offset for different types
     943             :  * of back refs.
     944             :  *
     945             :  * File extents can be referenced by:
     946             :  *
     947             :  * - multiple snapshots, subvolumes, or different generations in one subvol
     948             :  * - different files inside a single subvolume
     949             :  * - different offsets inside a file (bookend extents in file.c)
     950             :  *
     951             :  * The extent ref structure for the implicit back refs has fields for:
     952             :  *
     953             :  * - Objectid of the subvolume root
     954             :  * - objectid of the file holding the reference
     955             :  * - original offset in the file
     956             :  * - how many bookend extents
     957             :  *
     958             :  * The key offset for the implicit back refs is hash of the first
     959             :  * three fields.
     960             :  *
     961             :  * The extent ref structure for the full back refs has field for:
     962             :  *
     963             :  * - number of pointers in the tree leaf
     964             :  *
     965             :  * The key offset for the implicit back refs is the first byte of
     966             :  * the tree leaf
     967             :  *
     968             :  * When a file extent is allocated, The implicit back refs is used.
     969             :  * the fields are filled in:
     970             :  *
     971             :  *     (root_key.objectid, inode objectid, offset in file, 1)
     972             :  *
     973             :  * When a file extent is removed file truncation, we find the
     974             :  * corresponding implicit back refs and check the following fields:
     975             :  *
     976             :  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
     977             :  *
     978             :  * Btree extents can be referenced by:
     979             :  *
     980             :  * - Different subvolumes
     981             :  *
     982             :  * Both the implicit back refs and the full back refs for tree blocks
     983             :  * only consist of key. The key offset for the implicit back refs is
     984             :  * objectid of block's owner tree. The key offset for the full back refs
     985             :  * is the first byte of parent block.
     986             :  *
     987             :  * When implicit back refs is used, information about the lowest key and
     988             :  * level of the tree block are required. These information are stored in
     989             :  * tree block info structure.
     990             :  */
     991             : 
     992             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
     993           0 : static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
     994             :                                   struct btrfs_root *root,
     995             :                                   struct btrfs_path *path,
     996             :                                   u64 owner, u32 extra_size)
     997             : {
     998             :         struct btrfs_extent_item *item;
     999             :         struct btrfs_extent_item_v0 *ei0;
    1000             :         struct btrfs_extent_ref_v0 *ref0;
    1001             :         struct btrfs_tree_block_info *bi;
    1002           0 :         struct extent_buffer *leaf;
    1003             :         struct btrfs_key key;
    1004             :         struct btrfs_key found_key;
    1005             :         u32 new_size = sizeof(*item);
    1006             :         u64 refs;
    1007             :         int ret;
    1008             : 
    1009           0 :         leaf = path->nodes[0];
    1010           0 :         BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
    1011             : 
    1012           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    1013           0 :         ei0 = btrfs_item_ptr(leaf, path->slots[0],
    1014             :                              struct btrfs_extent_item_v0);
    1015           0 :         refs = btrfs_extent_refs_v0(leaf, ei0);
    1016             : 
    1017           0 :         if (owner == (u64)-1) {
    1018             :                 while (1) {
    1019           0 :                         if (path->slots[0] >= btrfs_header_nritems(leaf)) {
    1020           0 :                                 ret = btrfs_next_leaf(root, path);
    1021           0 :                                 if (ret < 0)
    1022             :                                         return ret;
    1023           0 :                                 BUG_ON(ret > 0); /* Corruption */
    1024           0 :                                 leaf = path->nodes[0];
    1025             :                         }
    1026           0 :                         btrfs_item_key_to_cpu(leaf, &found_key,
    1027             :                                               path->slots[0]);
    1028           0 :                         BUG_ON(key.objectid != found_key.objectid);
    1029           0 :                         if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
    1030           0 :                                 path->slots[0]++;
    1031           0 :                                 continue;
    1032             :                         }
    1033           0 :                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
    1034             :                                               struct btrfs_extent_ref_v0);
    1035             :                         owner = btrfs_ref_objectid_v0(leaf, ref0);
    1036           0 :                         break;
    1037           0 :                 }
    1038             :         }
    1039           0 :         btrfs_release_path(path);
    1040             : 
    1041           0 :         if (owner < BTRFS_FIRST_FREE_OBJECTID)
    1042             :                 new_size += sizeof(*bi);
    1043             : 
    1044           0 :         new_size -= sizeof(*ei0);
    1045           0 :         ret = btrfs_search_slot(trans, root, &key, path,
    1046           0 :                                 new_size + extra_size, 1);
    1047           0 :         if (ret < 0)
    1048             :                 return ret;
    1049           0 :         BUG_ON(ret); /* Corruption */
    1050             : 
    1051           0 :         btrfs_extend_item(root, path, new_size);
    1052             : 
    1053           0 :         leaf = path->nodes[0];
    1054           0 :         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1055             :         btrfs_set_extent_refs(leaf, item, refs);
    1056             :         /* FIXME: get real generation */
    1057             :         btrfs_set_extent_generation(leaf, item, 0);
    1058           0 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1059             :                 btrfs_set_extent_flags(leaf, item,
    1060             :                                        BTRFS_EXTENT_FLAG_TREE_BLOCK |
    1061             :                                        BTRFS_BLOCK_FLAG_FULL_BACKREF);
    1062           0 :                 bi = (struct btrfs_tree_block_info *)(item + 1);
    1063             :                 /* FIXME: get first key of the block */
    1064           0 :                 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
    1065           0 :                 btrfs_set_tree_block_level(leaf, bi, (int)owner);
    1066             :         } else {
    1067             :                 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
    1068             :         }
    1069           0 :         btrfs_mark_buffer_dirty(leaf);
    1070           0 :         return 0;
    1071             : }
    1072             : #endif
    1073             : 
    1074        1598 : static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
    1075             : {
    1076             :         u32 high_crc = ~(u32)0;
    1077             :         u32 low_crc = ~(u32)0;
    1078             :         __le64 lenum;
    1079             : 
    1080        1598 :         lenum = cpu_to_le64(root_objectid);
    1081        1598 :         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
    1082        1598 :         lenum = cpu_to_le64(owner);
    1083        1598 :         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
    1084        1598 :         lenum = cpu_to_le64(offset);
    1085        1598 :         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
    1086             : 
    1087        1598 :         return ((u64)high_crc << 31) ^ (u64)low_crc;
    1088             : }
    1089             : 
    1090         799 : static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
    1091             :                                      struct btrfs_extent_data_ref *ref)
    1092             : {
    1093         799 :         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
    1094             :                                     btrfs_extent_data_ref_objectid(leaf, ref),
    1095             :                                     btrfs_extent_data_ref_offset(leaf, ref));
    1096             : }
    1097             : 
    1098       19777 : static int match_extent_data_ref(struct extent_buffer *leaf,
    1099             :                                  struct btrfs_extent_data_ref *ref,
    1100             :                                  u64 root_objectid, u64 owner, u64 offset)
    1101             : {
    1102       38786 :         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
    1103       18984 :             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
    1104             :             btrfs_extent_data_ref_offset(leaf, ref) != offset)
    1105             :                 return 0;
    1106             :         return 1;
    1107             : }
    1108             : 
    1109           0 : static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
    1110             :                                            struct btrfs_root *root,
    1111             :                                            struct btrfs_path *path,
    1112             :                                            u64 bytenr, u64 parent,
    1113             :                                            u64 root_objectid,
    1114             :                                            u64 owner, u64 offset)
    1115             : {
    1116             :         struct btrfs_key key;
    1117             :         struct btrfs_extent_data_ref *ref;
    1118           0 :         struct extent_buffer *leaf;
    1119             :         u32 nritems;
    1120             :         int ret;
    1121             :         int recow;
    1122             :         int err = -ENOENT;
    1123             : 
    1124           0 :         key.objectid = bytenr;
    1125           0 :         if (parent) {
    1126           0 :                 key.type = BTRFS_SHARED_DATA_REF_KEY;
    1127           0 :                 key.offset = parent;
    1128             :         } else {
    1129           0 :                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
    1130           0 :                 key.offset = hash_extent_data_ref(root_objectid,
    1131             :                                                   owner, offset);
    1132             :         }
    1133             : again:
    1134             :         recow = 0;
    1135           0 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    1136           0 :         if (ret < 0) {
    1137             :                 err = ret;
    1138             :                 goto fail;
    1139             :         }
    1140             : 
    1141           0 :         if (parent) {
    1142           0 :                 if (!ret)
    1143             :                         return 0;
    1144             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    1145           0 :                 key.type = BTRFS_EXTENT_REF_V0_KEY;
    1146           0 :                 btrfs_release_path(path);
    1147           0 :                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    1148           0 :                 if (ret < 0) {
    1149             :                         err = ret;
    1150             :                         goto fail;
    1151             :                 }
    1152           0 :                 if (!ret)
    1153             :                         return 0;
    1154             : #endif
    1155             :                 goto fail;
    1156             :         }
    1157             : 
    1158           0 :         leaf = path->nodes[0];
    1159             :         nritems = btrfs_header_nritems(leaf);
    1160             :         while (1) {
    1161           0 :                 if (path->slots[0] >= nritems) {
    1162           0 :                         ret = btrfs_next_leaf(root, path);
    1163           0 :                         if (ret < 0)
    1164             :                                 err = ret;
    1165           0 :                         if (ret)
    1166             :                                 goto fail;
    1167             : 
    1168           0 :                         leaf = path->nodes[0];
    1169             :                         nritems = btrfs_header_nritems(leaf);
    1170             :                         recow = 1;
    1171             :                 }
    1172             : 
    1173           0 :                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    1174           0 :                 if (key.objectid != bytenr ||
    1175           0 :                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
    1176             :                         goto fail;
    1177             : 
    1178           0 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
    1179             :                                      struct btrfs_extent_data_ref);
    1180             : 
    1181           0 :                 if (match_extent_data_ref(leaf, ref, root_objectid,
    1182             :                                           owner, offset)) {
    1183           0 :                         if (recow) {
    1184           0 :                                 btrfs_release_path(path);
    1185           0 :                                 goto again;
    1186             :                         }
    1187             :                         err = 0;
    1188             :                         break;
    1189             :                 }
    1190           0 :                 path->slots[0]++;
    1191           0 :         }
    1192             : fail:
    1193           0 :         return err;
    1194             : }
    1195             : 
    1196           0 : static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
    1197             :                                            struct btrfs_root *root,
    1198             :                                            struct btrfs_path *path,
    1199             :                                            u64 bytenr, u64 parent,
    1200             :                                            u64 root_objectid, u64 owner,
    1201             :                                            u64 offset, int refs_to_add)
    1202             : {
    1203             :         struct btrfs_key key;
    1204             :         struct extent_buffer *leaf;
    1205             :         u32 size;
    1206             :         u32 num_refs;
    1207             :         int ret;
    1208             : 
    1209           0 :         key.objectid = bytenr;
    1210           0 :         if (parent) {
    1211           0 :                 key.type = BTRFS_SHARED_DATA_REF_KEY;
    1212           0 :                 key.offset = parent;
    1213             :                 size = sizeof(struct btrfs_shared_data_ref);
    1214             :         } else {
    1215           0 :                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
    1216           0 :                 key.offset = hash_extent_data_ref(root_objectid,
    1217             :                                                   owner, offset);
    1218             :                 size = sizeof(struct btrfs_extent_data_ref);
    1219             :         }
    1220             : 
    1221             :         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
    1222           0 :         if (ret && ret != -EEXIST)
    1223             :                 goto fail;
    1224             : 
    1225           0 :         leaf = path->nodes[0];
    1226           0 :         if (parent) {
    1227             :                 struct btrfs_shared_data_ref *ref;
    1228           0 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
    1229             :                                      struct btrfs_shared_data_ref);
    1230           0 :                 if (ret == 0) {
    1231           0 :                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
    1232             :                 } else {
    1233             :                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
    1234           0 :                         num_refs += refs_to_add;
    1235             :                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
    1236             :                 }
    1237             :         } else {
    1238             :                 struct btrfs_extent_data_ref *ref;
    1239           0 :                 while (ret == -EEXIST) {
    1240           0 :                         ref = btrfs_item_ptr(leaf, path->slots[0],
    1241             :                                              struct btrfs_extent_data_ref);
    1242           0 :                         if (match_extent_data_ref(leaf, ref, root_objectid,
    1243             :                                                   owner, offset))
    1244             :                                 break;
    1245           0 :                         btrfs_release_path(path);
    1246           0 :                         key.offset++;
    1247             :                         ret = btrfs_insert_empty_item(trans, root, path, &key,
    1248             :                                                       size);
    1249           0 :                         if (ret && ret != -EEXIST)
    1250             :                                 goto fail;
    1251             : 
    1252           0 :                         leaf = path->nodes[0];
    1253             :                 }
    1254           0 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
    1255             :                                      struct btrfs_extent_data_ref);
    1256           0 :                 if (ret == 0) {
    1257             :                         btrfs_set_extent_data_ref_root(leaf, ref,
    1258             :                                                        root_objectid);
    1259             :                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
    1260             :                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
    1261           0 :                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
    1262             :                 } else {
    1263             :                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
    1264           0 :                         num_refs += refs_to_add;
    1265             :                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
    1266             :                 }
    1267             :         }
    1268           0 :         btrfs_mark_buffer_dirty(leaf);
    1269             :         ret = 0;
    1270             : fail:
    1271           0 :         btrfs_release_path(path);
    1272           0 :         return ret;
    1273             : }
    1274             : 
    1275           0 : static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
    1276             :                                            struct btrfs_root *root,
    1277             :                                            struct btrfs_path *path,
    1278             :                                            int refs_to_drop, int *last_ref)
    1279             : {
    1280             :         struct btrfs_key key;
    1281             :         struct btrfs_extent_data_ref *ref1 = NULL;
    1282             :         struct btrfs_shared_data_ref *ref2 = NULL;
    1283             :         struct extent_buffer *leaf;
    1284             :         u32 num_refs = 0;
    1285             :         int ret = 0;
    1286             : 
    1287           0 :         leaf = path->nodes[0];
    1288           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    1289             : 
    1290           0 :         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
    1291           0 :                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
    1292             :                                       struct btrfs_extent_data_ref);
    1293             :                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
    1294           0 :         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
    1295           0 :                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
    1296             :                                       struct btrfs_shared_data_ref);
    1297             :                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
    1298             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    1299           0 :         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
    1300             :                 struct btrfs_extent_ref_v0 *ref0;
    1301           0 :                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
    1302             :                                       struct btrfs_extent_ref_v0);
    1303             :                 num_refs = btrfs_ref_count_v0(leaf, ref0);
    1304             : #endif
    1305             :         } else {
    1306           0 :                 BUG();
    1307             :         }
    1308             : 
    1309           0 :         BUG_ON(num_refs < refs_to_drop);
    1310           0 :         num_refs -= refs_to_drop;
    1311             : 
    1312           0 :         if (num_refs == 0) {
    1313             :                 ret = btrfs_del_item(trans, root, path);
    1314           0 :                 *last_ref = 1;
    1315             :         } else {
    1316           0 :                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
    1317             :                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
    1318           0 :                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
    1319             :                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
    1320             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    1321             :                 else {
    1322             :                         struct btrfs_extent_ref_v0 *ref0;
    1323           0 :                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
    1324             :                                         struct btrfs_extent_ref_v0);
    1325             :                         btrfs_set_ref_count_v0(leaf, ref0, num_refs);
    1326             :                 }
    1327             : #endif
    1328           0 :                 btrfs_mark_buffer_dirty(leaf);
    1329             :         }
    1330           0 :         return ret;
    1331             : }
    1332             : 
    1333       15648 : static noinline u32 extent_data_ref_count(struct btrfs_root *root,
    1334             :                                           struct btrfs_path *path,
    1335             :                                           struct btrfs_extent_inline_ref *iref)
    1336             : {
    1337             :         struct btrfs_key key;
    1338             :         struct extent_buffer *leaf;
    1339             :         struct btrfs_extent_data_ref *ref1;
    1340             :         struct btrfs_shared_data_ref *ref2;
    1341             :         u32 num_refs = 0;
    1342             : 
    1343       15648 :         leaf = path->nodes[0];
    1344       15648 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    1345       15648 :         if (iref) {
    1346       15648 :                 if (btrfs_extent_inline_ref_type(leaf, iref) ==
    1347             :                     BTRFS_EXTENT_DATA_REF_KEY) {
    1348       13178 :                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
    1349             :                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
    1350             :                 } else {
    1351        2470 :                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
    1352             :                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
    1353             :                 }
    1354           0 :         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
    1355           0 :                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
    1356             :                                       struct btrfs_extent_data_ref);
    1357             :                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
    1358           0 :         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
    1359           0 :                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
    1360             :                                       struct btrfs_shared_data_ref);
    1361             :                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
    1362             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    1363           0 :         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
    1364             :                 struct btrfs_extent_ref_v0 *ref0;
    1365           0 :                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
    1366             :                                       struct btrfs_extent_ref_v0);
    1367             :                 num_refs = btrfs_ref_count_v0(leaf, ref0);
    1368             : #endif
    1369             :         } else {
    1370           0 :                 WARN_ON(1);
    1371             :         }
    1372       15648 :         return num_refs;
    1373             : }
    1374             : 
    1375           0 : static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
    1376             :                                           struct btrfs_root *root,
    1377             :                                           struct btrfs_path *path,
    1378             :                                           u64 bytenr, u64 parent,
    1379             :                                           u64 root_objectid)
    1380             : {
    1381             :         struct btrfs_key key;
    1382             :         int ret;
    1383             : 
    1384           0 :         key.objectid = bytenr;
    1385           0 :         if (parent) {
    1386           0 :                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
    1387           0 :                 key.offset = parent;
    1388             :         } else {
    1389           0 :                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
    1390           0 :                 key.offset = root_objectid;
    1391             :         }
    1392             : 
    1393           0 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    1394           0 :         if (ret > 0)
    1395             :                 ret = -ENOENT;
    1396             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    1397           0 :         if (ret == -ENOENT && parent) {
    1398           0 :                 btrfs_release_path(path);
    1399           0 :                 key.type = BTRFS_EXTENT_REF_V0_KEY;
    1400           0 :                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    1401           0 :                 if (ret > 0)
    1402             :                         ret = -ENOENT;
    1403             :         }
    1404             : #endif
    1405           0 :         return ret;
    1406             : }
    1407             : 
    1408           0 : static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
    1409             :                                           struct btrfs_root *root,
    1410             :                                           struct btrfs_path *path,
    1411             :                                           u64 bytenr, u64 parent,
    1412             :                                           u64 root_objectid)
    1413             : {
    1414             :         struct btrfs_key key;
    1415             :         int ret;
    1416             : 
    1417           0 :         key.objectid = bytenr;
    1418           0 :         if (parent) {
    1419           0 :                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
    1420           0 :                 key.offset = parent;
    1421             :         } else {
    1422           0 :                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
    1423           0 :                 key.offset = root_objectid;
    1424             :         }
    1425             : 
    1426             :         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
    1427           0 :         btrfs_release_path(path);
    1428           0 :         return ret;
    1429             : }
    1430             : 
    1431             : static inline int extent_ref_type(u64 parent, u64 owner)
    1432             : {
    1433             :         int type;
    1434      105106 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1435       56108 :                 if (parent > 0)
    1436             :                         type = BTRFS_SHARED_BLOCK_REF_KEY;
    1437             :                 else
    1438             :                         type = BTRFS_TREE_BLOCK_REF_KEY;
    1439             :         } else {
    1440       48998 :                 if (parent > 0)
    1441             :                         type = BTRFS_SHARED_DATA_REF_KEY;
    1442             :                 else
    1443             :                         type = BTRFS_EXTENT_DATA_REF_KEY;
    1444             :         }
    1445             :         return type;
    1446             : }
    1447             : 
    1448       16646 : static int find_next_key(struct btrfs_path *path, int level,
    1449             :                          struct btrfs_key *key)
    1450             : 
    1451             : {
    1452       34128 :         for (; level < BTRFS_MAX_LEVEL; level++) {
    1453       34033 :                 if (!path->nodes[level])
    1454             :                         break;
    1455       33938 :                 if (path->slots[level] + 1 >=
    1456             :                     btrfs_header_nritems(path->nodes[level]))
    1457         418 :                         continue;
    1458       16551 :                 if (level == 0)
    1459       16231 :                         btrfs_item_key_to_cpu(path->nodes[level], key,
    1460             :                                               path->slots[level] + 1);
    1461             :                 else
    1462             :                         btrfs_node_key_to_cpu(path->nodes[level], key,
    1463             :                                               path->slots[level] + 1);
    1464             :                 return 0;
    1465             :         }
    1466             :         return 1;
    1467             : }
    1468             : 
    1469             : /*
    1470             :  * look for inline back ref. if back ref is found, *ref_ret is set
    1471             :  * to the address of inline back ref, and 0 is returned.
    1472             :  *
    1473             :  * if back ref isn't found, *ref_ret is set to the address where it
    1474             :  * should be inserted, and -ENOENT is returned.
    1475             :  *
    1476             :  * if insert is true and there are too many inline back refs, the path
    1477             :  * points to the extent item, and -EAGAIN is returned.
    1478             :  *
    1479             :  * NOTE: inline back refs are ordered in the same way that back ref
    1480             :  *       items in the tree are ordered.
    1481             :  */
    1482             : static noinline_for_stack
    1483       88585 : int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
    1484             :                                  struct btrfs_root *root,
    1485             :                                  struct btrfs_path *path,
    1486             :                                  struct btrfs_extent_inline_ref **ref_ret,
    1487             :                                  u64 bytenr, u64 num_bytes,
    1488             :                                  u64 parent, u64 root_objectid,
    1489             :                                  u64 owner, u64 offset, int insert)
    1490             : {
    1491             :         struct btrfs_key key;
    1492             :         struct extent_buffer *leaf;
    1493             :         struct btrfs_extent_item *ei;
    1494             :         struct btrfs_extent_inline_ref *iref;
    1495             :         u64 flags;
    1496             :         u64 item_size;
    1497             :         unsigned long ptr;
    1498             :         unsigned long end;
    1499             :         int extra_size;
    1500             :         int type;
    1501             :         int want;
    1502             :         int ret;
    1503             :         int err = 0;
    1504       88585 :         bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
    1505             :                                                  SKINNY_METADATA);
    1506             : 
    1507       88585 :         key.objectid = bytenr;
    1508       88585 :         key.type = BTRFS_EXTENT_ITEM_KEY;
    1509       88585 :         key.offset = num_bytes;
    1510             : 
    1511             :         want = extent_ref_type(parent, owner);
    1512       88585 :         if (insert) {
    1513       17520 :                 extra_size = btrfs_extent_inline_ref_size(want);
    1514       17520 :                 path->keep_locks = 1;
    1515             :         } else
    1516             :                 extra_size = -1;
    1517             : 
    1518             :         /*
    1519             :          * Owner is our parent level, so we can just add one to get the level
    1520             :          * for the block we are interested in.
    1521             :          */
    1522       88585 :         if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
    1523           0 :                 key.type = BTRFS_METADATA_ITEM_KEY;
    1524           0 :                 key.offset = owner;
    1525             :         }
    1526             : 
    1527             : again:
    1528       88585 :         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
    1529       88586 :         if (ret < 0) {
    1530             :                 err = ret;
    1531             :                 goto out;
    1532             :         }
    1533             : 
    1534             :         /*
    1535             :          * We may be a newly converted file system which still has the old fat
    1536             :          * extent entries for metadata, so try and see if we have one of those.
    1537             :          */
    1538       88586 :         if (ret > 0 && skinny_metadata) {
    1539             :                 skinny_metadata = false;
    1540           0 :                 if (path->slots[0]) {
    1541           0 :                         path->slots[0]--;
    1542           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
    1543             :                                               path->slots[0]);
    1544           0 :                         if (key.objectid == bytenr &&
    1545           0 :                             key.type == BTRFS_EXTENT_ITEM_KEY &&
    1546           0 :                             key.offset == num_bytes)
    1547             :                                 ret = 0;
    1548             :                 }
    1549           0 :                 if (ret) {
    1550           0 :                         key.objectid = bytenr;
    1551           0 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
    1552           0 :                         key.offset = num_bytes;
    1553           0 :                         btrfs_release_path(path);
    1554           0 :                         goto again;
    1555             :                 }
    1556             :         }
    1557             : 
    1558       88586 :         if (ret && !insert) {
    1559             :                 err = -ENOENT;
    1560             :                 goto out;
    1561       88586 :         } else if (WARN_ON(ret)) {
    1562             :                 err = -EIO;
    1563             :                 goto out;
    1564             :         }
    1565             : 
    1566       88586 :         leaf = path->nodes[0];
    1567      177172 :         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    1568             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    1569       88586 :         if (item_size < sizeof(*ei)) {
    1570           0 :                 if (!insert) {
    1571             :                         err = -ENOENT;
    1572             :                         goto out;
    1573             :                 }
    1574           0 :                 ret = convert_extent_item_v0(trans, root, path, owner,
    1575             :                                              extra_size);
    1576           0 :                 if (ret < 0) {
    1577             :                         err = ret;
    1578             :                         goto out;
    1579             :                 }
    1580           0 :                 leaf = path->nodes[0];
    1581           0 :                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    1582             :         }
    1583             : #endif
    1584       88586 :         BUG_ON(item_size < sizeof(*ei));
    1585             : 
    1586      177172 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1587             :         flags = btrfs_extent_flags(leaf, ei);
    1588             : 
    1589       88586 :         ptr = (unsigned long)(ei + 1);
    1590       88586 :         end = (unsigned long)ei + item_size;
    1591             : 
    1592       88586 :         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
    1593       52697 :                 ptr += sizeof(struct btrfs_tree_block_info);
    1594       52697 :                 BUG_ON(ptr > end);
    1595             :         }
    1596             : 
    1597             :         err = -ENOENT;
    1598             :         while (1) {
    1599      102559 :                 if (ptr >= end) {
    1600        5226 :                         WARN_ON(ptr > end);
    1601             :                         break;
    1602             :                 }
    1603       97333 :                 iref = (struct btrfs_extent_inline_ref *)ptr;
    1604       97333 :                 type = btrfs_extent_inline_ref_type(leaf, iref);
    1605       97333 :                 if (want < type)
    1606             :                         break;
    1607       96498 :                 if (want > type) {
    1608       12436 :                         ptr += btrfs_extent_inline_ref_size(type);
    1609       12436 :                         continue;
    1610             :                 }
    1611             : 
    1612       84062 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1613             :                         struct btrfs_extent_data_ref *dref;
    1614       19777 :                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1615       19777 :                         if (match_extent_data_ref(leaf, dref, root_objectid,
    1616             :                                                   owner, offset)) {
    1617             :                                 err = 0;
    1618             :                                 break;
    1619             :                         }
    1620        1598 :                         if (hash_extent_data_ref_item(leaf, dref) <
    1621         799 :                             hash_extent_data_ref(root_objectid, owner, offset))
    1622             :                                 break;
    1623             :                 } else {
    1624             :                         u64 ref_offset;
    1625             :                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
    1626       64285 :                         if (parent > 0) {
    1627       12174 :                                 if (parent == ref_offset) {
    1628             :                                         err = 0;
    1629             :                                         break;
    1630             :                                 }
    1631        7715 :                                 if (ref_offset < parent)
    1632             :                                         break;
    1633             :                         } else {
    1634       52111 :                                 if (root_objectid == ref_offset) {
    1635             :                                         err = 0;
    1636             :                                         break;
    1637             :                                 }
    1638        3482 :                                 if (ref_offset < root_objectid)
    1639             :                                         break;
    1640             :                         }
    1641             :                 }
    1642        1537 :                 ptr += btrfs_extent_inline_ref_size(type);
    1643             :         }
    1644       88587 :         if (err == -ENOENT && insert) {
    1645       33042 :                 if (item_size + extra_size >=
    1646       16521 :                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
    1647             :                         err = -EAGAIN;
    1648             :                         goto out;
    1649             :                 }
    1650             :                 /*
    1651             :                  * To add new inline back ref, we have to make sure
    1652             :                  * there is no corresponding back ref item.
    1653             :                  * For simplicity, we just do not add new inline back
    1654             :                  * ref if there is any kind of item for this block
    1655             :                  */
    1656       33036 :                 if (find_next_key(path, 0, &key) == 0 &&
    1657       16540 :                     key.objectid == bytenr &&
    1658          25 :                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
    1659             :                         err = -EAGAIN;
    1660             :                         goto out;
    1661             :                 }
    1662             :         }
    1663       88587 :         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
    1664             : out:
    1665       88587 :         if (insert) {
    1666       17521 :                 path->keep_locks = 0;
    1667       17521 :                 btrfs_unlock_up_safe(path, 1);
    1668             :         }
    1669       88587 :         return err;
    1670             : }
    1671             : 
    1672             : /*
    1673             :  * helper to add new inline back ref
    1674             :  */
    1675             : static noinline_for_stack
    1676       16521 : void setup_inline_extent_backref(struct btrfs_root *root,
    1677             :                                  struct btrfs_path *path,
    1678             :                                  struct btrfs_extent_inline_ref *iref,
    1679             :                                  u64 parent, u64 root_objectid,
    1680             :                                  u64 owner, u64 offset, int refs_to_add,
    1681             :                                  struct btrfs_delayed_extent_op *extent_op)
    1682             : {
    1683             :         struct extent_buffer *leaf;
    1684             :         struct btrfs_extent_item *ei;
    1685             :         unsigned long ptr;
    1686             :         unsigned long end;
    1687             :         unsigned long item_offset;
    1688             :         u64 refs;
    1689             :         int size;
    1690             :         int type;
    1691             : 
    1692       16521 :         leaf = path->nodes[0];
    1693       33042 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1694       16521 :         item_offset = (unsigned long)iref - (unsigned long)ei;
    1695             : 
    1696             :         type = extent_ref_type(parent, owner);
    1697       16521 :         size = btrfs_extent_inline_ref_size(type);
    1698             : 
    1699       16521 :         btrfs_extend_item(root, path, size);
    1700             : 
    1701       33042 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1702             :         refs = btrfs_extent_refs(leaf, ei);
    1703       16521 :         refs += refs_to_add;
    1704             :         btrfs_set_extent_refs(leaf, ei, refs);
    1705       16521 :         if (extent_op)
    1706         105 :                 __run_delayed_extent_op(extent_op, leaf, ei);
    1707             : 
    1708       16521 :         ptr = (unsigned long)ei + item_offset;
    1709       33042 :         end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
    1710       16521 :         if (ptr < end - size)
    1711       11295 :                 memmove_extent_buffer(leaf, ptr + size, ptr,
    1712             :                                       end - size - ptr);
    1713             : 
    1714       16521 :         iref = (struct btrfs_extent_inline_ref *)ptr;
    1715       16521 :         btrfs_set_extent_inline_ref_type(leaf, iref, type);
    1716       16521 :         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1717             :                 struct btrfs_extent_data_ref *dref;
    1718        1399 :                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1719             :                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
    1720             :                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
    1721             :                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
    1722        1399 :                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
    1723       15122 :         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
    1724             :                 struct btrfs_shared_data_ref *sref;
    1725       11710 :                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
    1726       11710 :                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
    1727             :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    1728        3412 :         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
    1729             :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    1730             :         } else {
    1731             :                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
    1732             :         }
    1733       16521 :         btrfs_mark_buffer_dirty(leaf);
    1734       16521 : }
    1735             : 
    1736       71064 : static int lookup_extent_backref(struct btrfs_trans_handle *trans,
    1737             :                                  struct btrfs_root *root,
    1738             :                                  struct btrfs_path *path,
    1739             :                                  struct btrfs_extent_inline_ref **ref_ret,
    1740             :                                  u64 bytenr, u64 num_bytes, u64 parent,
    1741             :                                  u64 root_objectid, u64 owner, u64 offset)
    1742             : {
    1743             :         int ret;
    1744             : 
    1745       71064 :         ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
    1746             :                                            bytenr, num_bytes, parent,
    1747             :                                            root_objectid, owner, offset, 0);
    1748       71066 :         if (ret != -ENOENT)
    1749             :                 return ret;
    1750             : 
    1751           0 :         btrfs_release_path(path);
    1752           0 :         *ref_ret = NULL;
    1753             : 
    1754           0 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1755           0 :                 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
    1756             :                                             root_objectid);
    1757             :         } else {
    1758           0 :                 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
    1759             :                                              root_objectid, owner, offset);
    1760             :         }
    1761           0 :         return ret;
    1762             : }
    1763             : 
    1764             : /*
    1765             :  * helper to update/remove inline back ref
    1766             :  */
    1767             : static noinline_for_stack
    1768        7795 : void update_inline_extent_backref(struct btrfs_root *root,
    1769             :                                   struct btrfs_path *path,
    1770             :                                   struct btrfs_extent_inline_ref *iref,
    1771             :                                   int refs_to_mod,
    1772             :                                   struct btrfs_delayed_extent_op *extent_op,
    1773             :                                   int *last_ref)
    1774             : {
    1775             :         struct extent_buffer *leaf;
    1776             :         struct btrfs_extent_item *ei;
    1777             :         struct btrfs_extent_data_ref *dref = NULL;
    1778             :         struct btrfs_shared_data_ref *sref = NULL;
    1779             :         unsigned long ptr;
    1780             :         unsigned long end;
    1781             :         u32 item_size;
    1782             :         int size;
    1783             :         int type;
    1784             :         u64 refs;
    1785             : 
    1786        7795 :         leaf = path->nodes[0];
    1787       15590 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1788             :         refs = btrfs_extent_refs(leaf, ei);
    1789        7795 :         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
    1790        7795 :         refs += refs_to_mod;
    1791             :         btrfs_set_extent_refs(leaf, ei, refs);
    1792        7795 :         if (extent_op)
    1793           0 :                 __run_delayed_extent_op(extent_op, leaf, ei);
    1794             : 
    1795        7795 :         type = btrfs_extent_inline_ref_type(leaf, iref);
    1796             : 
    1797        7795 :         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1798        5800 :                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1799        5800 :                 refs = btrfs_extent_data_ref_count(leaf, dref);
    1800        1995 :         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
    1801        1333 :                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
    1802        1333 :                 refs = btrfs_shared_data_ref_count(leaf, sref);
    1803             :         } else {
    1804             :                 refs = 1;
    1805         662 :                 BUG_ON(refs_to_mod != -1);
    1806             :         }
    1807             : 
    1808        7795 :         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
    1809        7795 :         refs += refs_to_mod;
    1810             : 
    1811        7795 :         if (refs > 0) {
    1812        1148 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
    1813        1148 :                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
    1814             :                 else
    1815           0 :                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
    1816             :         } else {
    1817        6647 :                 *last_ref = 1;
    1818        6647 :                 size =  btrfs_extent_inline_ref_size(type);
    1819        6647 :                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    1820        6647 :                 ptr = (unsigned long)iref;
    1821        6647 :                 end = (unsigned long)ei + item_size;
    1822        6647 :                 if (ptr + size < end)
    1823        4850 :                         memmove_extent_buffer(leaf, ptr, ptr + size,
    1824        4850 :                                               end - ptr - size);
    1825        6647 :                 item_size -= size;
    1826        6647 :                 btrfs_truncate_item(root, path, item_size, 1);
    1827             :         }
    1828        7795 :         btrfs_mark_buffer_dirty(leaf);
    1829        7795 : }
    1830             : 
    1831             : static noinline_for_stack
    1832       17521 : int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
    1833             :                                  struct btrfs_root *root,
    1834             :                                  struct btrfs_path *path,
    1835             :                                  u64 bytenr, u64 num_bytes, u64 parent,
    1836             :                                  u64 root_objectid, u64 owner,
    1837             :                                  u64 offset, int refs_to_add,
    1838             :                                  struct btrfs_delayed_extent_op *extent_op)
    1839             : {
    1840             :         struct btrfs_extent_inline_ref *iref;
    1841             :         int ret;
    1842             : 
    1843       17521 :         ret = lookup_inline_extent_backref(trans, root, path, &iref,
    1844             :                                            bytenr, num_bytes, parent,
    1845             :                                            root_objectid, owner, offset, 1);
    1846       17521 :         if (ret == 0) {
    1847        1000 :                 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
    1848        1000 :                 update_inline_extent_backref(root, path, iref,
    1849             :                                              refs_to_add, extent_op, NULL);
    1850       16521 :         } else if (ret == -ENOENT) {
    1851       16521 :                 setup_inline_extent_backref(root, path, iref, parent,
    1852             :                                             root_objectid, owner, offset,
    1853             :                                             refs_to_add, extent_op);
    1854             :                 ret = 0;
    1855             :         }
    1856       17521 :         return ret;
    1857             : }
    1858             : 
    1859           0 : static int insert_extent_backref(struct btrfs_trans_handle *trans,
    1860             :                                  struct btrfs_root *root,
    1861             :                                  struct btrfs_path *path,
    1862             :                                  u64 bytenr, u64 parent, u64 root_objectid,
    1863             :                                  u64 owner, u64 offset, int refs_to_add)
    1864             : {
    1865             :         int ret;
    1866           0 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1867           0 :                 BUG_ON(refs_to_add != 1);
    1868           0 :                 ret = insert_tree_block_ref(trans, root, path, bytenr,
    1869             :                                             parent, root_objectid);
    1870             :         } else {
    1871           0 :                 ret = insert_extent_data_ref(trans, root, path, bytenr,
    1872             :                                              parent, root_objectid,
    1873             :                                              owner, offset, refs_to_add);
    1874             :         }
    1875           0 :         return ret;
    1876             : }
    1877             : 
    1878        6795 : static int remove_extent_backref(struct btrfs_trans_handle *trans,
    1879             :                                  struct btrfs_root *root,
    1880             :                                  struct btrfs_path *path,
    1881             :                                  struct btrfs_extent_inline_ref *iref,
    1882             :                                  int refs_to_drop, int is_data, int *last_ref)
    1883             : {
    1884             :         int ret = 0;
    1885             : 
    1886        6795 :         BUG_ON(!is_data && refs_to_drop != 1);
    1887        6795 :         if (iref) {
    1888        6795 :                 update_inline_extent_backref(root, path, iref,
    1889             :                                              -refs_to_drop, NULL, last_ref);
    1890           0 :         } else if (is_data) {
    1891           0 :                 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
    1892             :                                              last_ref);
    1893             :         } else {
    1894           0 :                 *last_ref = 1;
    1895             :                 ret = btrfs_del_item(trans, root, path);
    1896             :         }
    1897        6795 :         return ret;
    1898             : }
    1899             : 
    1900             : static int btrfs_issue_discard(struct block_device *bdev,
    1901             :                                 u64 start, u64 len)
    1902             : {
    1903           0 :         return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
    1904             : }
    1905             : 
    1906           0 : static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
    1907             :                                 u64 num_bytes, u64 *actual_bytes)
    1908             : {
    1909             :         int ret;
    1910             :         u64 discarded_bytes = 0;
    1911           0 :         struct btrfs_bio *bbio = NULL;
    1912             : 
    1913             : 
    1914             :         /* Tell the block device(s) that the sectors can be discarded */
    1915           0 :         ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
    1916             :                               bytenr, &num_bytes, &bbio, 0);
    1917             :         /* Error condition is -ENOMEM */
    1918           0 :         if (!ret) {
    1919           0 :                 struct btrfs_bio_stripe *stripe = bbio->stripes;
    1920             :                 int i;
    1921             : 
    1922             : 
    1923           0 :                 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
    1924           0 :                         if (!stripe->dev->can_discard)
    1925           0 :                                 continue;
    1926             : 
    1927           0 :                         ret = btrfs_issue_discard(stripe->dev->bdev,
    1928             :                                                   stripe->physical,
    1929             :                                                   stripe->length);
    1930           0 :                         if (!ret)
    1931           0 :                                 discarded_bytes += stripe->length;
    1932           0 :                         else if (ret != -EOPNOTSUPP)
    1933             :                                 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
    1934             : 
    1935             :                         /*
    1936             :                          * Just in case we get back EOPNOTSUPP for some reason,
    1937             :                          * just ignore the return value so we don't screw up
    1938             :                          * people calling discard_extent.
    1939             :                          */
    1940             :                         ret = 0;
    1941             :                 }
    1942           0 :                 kfree(bbio);
    1943             :         }
    1944             : 
    1945           0 :         if (actual_bytes)
    1946           0 :                 *actual_bytes = discarded_bytes;
    1947             : 
    1948             : 
    1949           0 :         if (ret == -EOPNOTSUPP)
    1950             :                 ret = 0;
    1951           0 :         return ret;
    1952             : }
    1953             : 
    1954             : /* Can return -ENOMEM */
    1955       21718 : int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
    1956             :                          struct btrfs_root *root,
    1957             :                          u64 bytenr, u64 num_bytes, u64 parent,
    1958             :                          u64 root_objectid, u64 owner, u64 offset,
    1959             :                          int no_quota)
    1960             : {
    1961             :         int ret;
    1962       21718 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1963             : 
    1964       21718 :         BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
    1965             :                root_objectid == BTRFS_TREE_LOG_OBJECTID);
    1966             : 
    1967       21718 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1968        3458 :                 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
    1969             :                                         num_bytes,
    1970             :                                         parent, root_objectid, (int)owner,
    1971             :                                         BTRFS_ADD_DELAYED_REF, NULL, no_quota);
    1972             :         } else {
    1973       18260 :                 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
    1974             :                                         num_bytes,
    1975             :                                         parent, root_objectid, owner, offset,
    1976             :                                         BTRFS_ADD_DELAYED_REF, NULL, no_quota);
    1977             :         }
    1978       21718 :         return ret;
    1979             : }
    1980             : 
    1981       17521 : static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
    1982             :                                   struct btrfs_root *root,
    1983             :                                   u64 bytenr, u64 num_bytes,
    1984             :                                   u64 parent, u64 root_objectid,
    1985             :                                   u64 owner, u64 offset, int refs_to_add,
    1986             :                                   int no_quota,
    1987             :                                   struct btrfs_delayed_extent_op *extent_op)
    1988             : {
    1989       17521 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1990             :         struct btrfs_path *path;
    1991             :         struct extent_buffer *leaf;
    1992             :         struct btrfs_extent_item *item;
    1993             :         struct btrfs_key key;
    1994             :         u64 refs;
    1995             :         int ret;
    1996             :         enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
    1997             : 
    1998       17521 :         path = btrfs_alloc_path();
    1999       17521 :         if (!path)
    2000             :                 return -ENOMEM;
    2001             : 
    2002       17521 :         if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
    2003             :                 no_quota = 1;
    2004             : 
    2005       17521 :         path->reada = 1;
    2006       17521 :         path->leave_spinning = 1;
    2007             :         /* this will setup the path even if it fails to insert the back ref */
    2008       17521 :         ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
    2009             :                                            bytenr, num_bytes, parent,
    2010             :                                            root_objectid, owner, offset,
    2011             :                                            refs_to_add, extent_op);
    2012       17521 :         if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
    2013             :                 goto out;
    2014             :         /*
    2015             :          * Ok we were able to insert an inline extent and it appears to be a new
    2016             :          * reference, deal with the qgroup accounting.
    2017             :          */
    2018           0 :         if (!ret && !no_quota) {
    2019             :                 ASSERT(root->fs_info->quota_enabled);
    2020           0 :                 leaf = path->nodes[0];
    2021           0 :                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2022           0 :                 item = btrfs_item_ptr(leaf, path->slots[0],
    2023             :                                       struct btrfs_extent_item);
    2024           0 :                 if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
    2025             :                         type = BTRFS_QGROUP_OPER_ADD_SHARED;
    2026           0 :                 btrfs_release_path(path);
    2027             : 
    2028           0 :                 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
    2029             :                                               bytenr, num_bytes, type, 0);
    2030           0 :                 goto out;
    2031             :         }
    2032             : 
    2033             :         /*
    2034             :          * Ok we had -EAGAIN which means we didn't have space to insert and
    2035             :          * inline extent ref, so just update the reference count and add a
    2036             :          * normal backref.
    2037             :          */
    2038           0 :         leaf = path->nodes[0];
    2039           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2040           0 :         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    2041             :         refs = btrfs_extent_refs(leaf, item);
    2042           0 :         if (refs)
    2043             :                 type = BTRFS_QGROUP_OPER_ADD_SHARED;
    2044           0 :         btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
    2045           0 :         if (extent_op)
    2046           0 :                 __run_delayed_extent_op(extent_op, leaf, item);
    2047             : 
    2048           0 :         btrfs_mark_buffer_dirty(leaf);
    2049           0 :         btrfs_release_path(path);
    2050             : 
    2051           0 :         if (!no_quota) {
    2052           0 :                 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
    2053             :                                               bytenr, num_bytes, type, 0);
    2054           0 :                 if (ret)
    2055             :                         goto out;
    2056             :         }
    2057             : 
    2058           0 :         path->reada = 1;
    2059           0 :         path->leave_spinning = 1;
    2060             :         /* now insert the actual backref */
    2061           0 :         ret = insert_extent_backref(trans, root->fs_info->extent_root,
    2062             :                                     path, bytenr, parent, root_objectid,
    2063             :                                     owner, offset, refs_to_add);
    2064           0 :         if (ret)
    2065           0 :                 btrfs_abort_transaction(trans, root, ret);
    2066             : out:
    2067       17521 :         btrfs_free_path(path);
    2068       17521 :         return ret;
    2069             : }
    2070             : 
    2071       88601 : static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
    2072             :                                 struct btrfs_root *root,
    2073             :                                 struct btrfs_delayed_ref_node *node,
    2074             :                                 struct btrfs_delayed_extent_op *extent_op,
    2075             :                                 int insert_reserved)
    2076             : {
    2077             :         int ret = 0;
    2078             :         struct btrfs_delayed_data_ref *ref;
    2079             :         struct btrfs_key ins;
    2080             :         u64 parent = 0;
    2081             :         u64 ref_root = 0;
    2082             :         u64 flags = 0;
    2083             : 
    2084       88601 :         ins.objectid = node->bytenr;
    2085       88601 :         ins.offset = node->num_bytes;
    2086       88601 :         ins.type = BTRFS_EXTENT_ITEM_KEY;
    2087             : 
    2088             :         ref = btrfs_delayed_node_to_data_ref(node);
    2089       88601 :         trace_run_delayed_data_ref(node, ref, node->action);
    2090             : 
    2091       88601 :         if (node->type == BTRFS_SHARED_DATA_REF_KEY)
    2092       15513 :                 parent = ref->parent;
    2093       88601 :         ref_root = ref->root;
    2094             : 
    2095       88601 :         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
    2096       52711 :                 if (extent_op)
    2097           0 :                         flags |= extent_op->flags_to_set;
    2098       52711 :                 ret = alloc_reserved_file_extent(trans, root,
    2099             :                                                  parent, ref_root, flags,
    2100             :                                                  ref->objectid, ref->offset,
    2101             :                                                  &ins, node->ref_mod);
    2102       35890 :         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
    2103       14109 :                 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
    2104             :                                              node->num_bytes, parent,
    2105             :                                              ref_root, ref->objectid,
    2106             :                                              ref->offset, node->ref_mod,
    2107       14109 :                                              node->no_quota, extent_op);
    2108       21781 :         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
    2109       21781 :                 ret = __btrfs_free_extent(trans, root, node->bytenr,
    2110             :                                           node->num_bytes, parent,
    2111             :                                           ref_root, ref->objectid,
    2112             :                                           ref->offset, node->ref_mod,
    2113       21781 :                                           extent_op, node->no_quota);
    2114             :         } else {
    2115           0 :                 BUG();
    2116             :         }
    2117       88601 :         return ret;
    2118             : }
    2119             : 
    2120         412 : static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
    2121             :                                     struct extent_buffer *leaf,
    2122             :                                     struct btrfs_extent_item *ei)
    2123             : {
    2124             :         u64 flags = btrfs_extent_flags(leaf, ei);
    2125         412 :         if (extent_op->update_flags) {
    2126         412 :                 flags |= extent_op->flags_to_set;
    2127             :                 btrfs_set_extent_flags(leaf, ei, flags);
    2128             :         }
    2129             : 
    2130         412 :         if (extent_op->update_key) {
    2131             :                 struct btrfs_tree_block_info *bi;
    2132           0 :                 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
    2133           0 :                 bi = (struct btrfs_tree_block_info *)(ei + 1);
    2134           0 :                 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
    2135             :         }
    2136         412 : }
    2137             : 
    2138           9 : static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
    2139             :                                  struct btrfs_root *root,
    2140             :                                  struct btrfs_delayed_ref_node *node,
    2141             :                                  struct btrfs_delayed_extent_op *extent_op)
    2142             : {
    2143             :         struct btrfs_key key;
    2144             :         struct btrfs_path *path;
    2145             :         struct btrfs_extent_item *ei;
    2146             :         struct extent_buffer *leaf;
    2147             :         u32 item_size;
    2148             :         int ret;
    2149             :         int err = 0;
    2150           9 :         int metadata = !extent_op->is_data;
    2151             : 
    2152           9 :         if (trans->aborted)
    2153             :                 return 0;
    2154             : 
    2155          18 :         if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
    2156             :                 metadata = 0;
    2157             : 
    2158           9 :         path = btrfs_alloc_path();
    2159           9 :         if (!path)
    2160             :                 return -ENOMEM;
    2161             : 
    2162           9 :         key.objectid = node->bytenr;
    2163             : 
    2164           9 :         if (metadata) {
    2165           0 :                 key.type = BTRFS_METADATA_ITEM_KEY;
    2166           0 :                 key.offset = extent_op->level;
    2167             :         } else {
    2168           9 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
    2169           9 :                 key.offset = node->num_bytes;
    2170             :         }
    2171             : 
    2172             : again:
    2173           9 :         path->reada = 1;
    2174           9 :         path->leave_spinning = 1;
    2175           9 :         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
    2176             :                                 path, 0, 1);
    2177           9 :         if (ret < 0) {
    2178             :                 err = ret;
    2179             :                 goto out;
    2180             :         }
    2181           9 :         if (ret > 0) {
    2182           0 :                 if (metadata) {
    2183           0 :                         if (path->slots[0] > 0) {
    2184           0 :                                 path->slots[0]--;
    2185           0 :                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    2186             :                                                       path->slots[0]);
    2187           0 :                                 if (key.objectid == node->bytenr &&
    2188           0 :                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
    2189           0 :                                     key.offset == node->num_bytes)
    2190             :                                         ret = 0;
    2191             :                         }
    2192           0 :                         if (ret > 0) {
    2193           0 :                                 btrfs_release_path(path);
    2194             :                                 metadata = 0;
    2195             : 
    2196           0 :                                 key.objectid = node->bytenr;
    2197           0 :                                 key.offset = node->num_bytes;
    2198           0 :                                 key.type = BTRFS_EXTENT_ITEM_KEY;
    2199             :                                 goto again;
    2200             :                         }
    2201             :                 } else {
    2202             :                         err = -EIO;
    2203             :                         goto out;
    2204             :                 }
    2205             :         }
    2206             : 
    2207           9 :         leaf = path->nodes[0];
    2208           9 :         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    2209             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    2210           9 :         if (item_size < sizeof(*ei)) {
    2211           0 :                 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
    2212             :                                              path, (u64)-1, 0);
    2213           0 :                 if (ret < 0) {
    2214             :                         err = ret;
    2215             :                         goto out;
    2216             :                 }
    2217           0 :                 leaf = path->nodes[0];
    2218           0 :                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    2219             :         }
    2220             : #endif
    2221           9 :         BUG_ON(item_size < sizeof(*ei));
    2222          18 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    2223           9 :         __run_delayed_extent_op(extent_op, leaf, ei);
    2224             : 
    2225           9 :         btrfs_mark_buffer_dirty(leaf);
    2226             : out:
    2227           9 :         btrfs_free_path(path);
    2228             :         return err;
    2229             : }
    2230             : 
    2231      106070 : static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
    2232             :                                 struct btrfs_root *root,
    2233             :                                 struct btrfs_delayed_ref_node *node,
    2234             :                                 struct btrfs_delayed_extent_op *extent_op,
    2235             :                                 int insert_reserved)
    2236             : {
    2237             :         int ret = 0;
    2238             :         struct btrfs_delayed_tree_ref *ref;
    2239             :         struct btrfs_key ins;
    2240             :         u64 parent = 0;
    2241             :         u64 ref_root = 0;
    2242      106070 :         bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
    2243             :                                                  SKINNY_METADATA);
    2244             : 
    2245             :         ref = btrfs_delayed_node_to_tree_ref(node);
    2246      106070 :         trace_run_delayed_tree_ref(node, ref, node->action);
    2247             : 
    2248      106070 :         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
    2249        1312 :                 parent = ref->parent;
    2250      106070 :         ref_root = ref->root;
    2251             : 
    2252      106070 :         ins.objectid = node->bytenr;
    2253      106070 :         if (skinny_metadata) {
    2254           0 :                 ins.offset = ref->level;
    2255           0 :                 ins.type = BTRFS_METADATA_ITEM_KEY;
    2256             :         } else {
    2257      106070 :                 ins.offset = node->num_bytes;
    2258      106070 :                 ins.type = BTRFS_EXTENT_ITEM_KEY;
    2259             :         }
    2260             : 
    2261      106070 :         BUG_ON(node->ref_mod != 1);
    2262      106070 :         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
    2263       53373 :                 BUG_ON(!extent_op || !extent_op->update_flags);
    2264       53373 :                 ret = alloc_reserved_tree_block(trans, root,
    2265             :                                                 parent, ref_root,
    2266             :                                                 extent_op->flags_to_set,
    2267             :                                                 &extent_op->key,
    2268             :                                                 ref->level, &ins,
    2269       53373 :                                                 node->no_quota);
    2270       52697 :         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
    2271        6824 :                 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
    2272             :                                              node->num_bytes, parent, ref_root,
    2273        6824 :                                              ref->level, 0, 1, node->no_quota,
    2274             :                                              extent_op);
    2275       49285 :         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
    2276       98570 :                 ret = __btrfs_free_extent(trans, root, node->bytenr,
    2277             :                                           node->num_bytes, parent, ref_root,
    2278       49285 :                                           ref->level, 0, 1, extent_op,
    2279       49285 :                                           node->no_quota);
    2280             :         } else {
    2281           0 :                 BUG();
    2282             :         }
    2283      106070 :         return ret;
    2284             : }
    2285             : 
    2286             : /* helper function to actually process a single delayed ref entry */
    2287      386435 : static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
    2288             :                                struct btrfs_root *root,
    2289             :                                struct btrfs_delayed_ref_node *node,
    2290             :                                struct btrfs_delayed_extent_op *extent_op,
    2291             :                                int insert_reserved)
    2292             : {
    2293             :         int ret = 0;
    2294             : 
    2295      386435 :         if (trans->aborted) {
    2296           0 :                 if (insert_reserved)
    2297           0 :                         btrfs_pin_extent(root, node->bytenr,
    2298             :                                          node->num_bytes, 1);
    2299             :                 return 0;
    2300             :         }
    2301             : 
    2302      386435 :         if (btrfs_delayed_ref_is_head(node)) {
    2303             :                 struct btrfs_delayed_ref_head *head;
    2304             :                 /*
    2305             :                  * we've hit the end of the chain and we were supposed
    2306             :                  * to insert this extent into the tree.  But, it got
    2307             :                  * deleted before we ever needed to insert it, so all
    2308             :                  * we have to do is clean up the accounting
    2309             :                  */
    2310      191764 :                 BUG_ON(extent_op);
    2311             :                 head = btrfs_delayed_node_to_head(node);
    2312      191764 :                 trace_run_delayed_ref_head(node, head, node->action);
    2313             : 
    2314      191764 :                 if (insert_reserved) {
    2315         889 :                         btrfs_pin_extent(root, node->bytenr,
    2316             :                                          node->num_bytes, 1);
    2317         889 :                         if (head->is_data) {
    2318         620 :                                 ret = btrfs_del_csums(trans, root,
    2319             :                                                       node->bytenr,
    2320             :                                                       node->num_bytes);
    2321             :                         }
    2322             :                 }
    2323      191764 :                 return ret;
    2324             :         }
    2325             : 
    2326      194671 :         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
    2327             :             node->type == BTRFS_SHARED_BLOCK_REF_KEY)
    2328      106070 :                 ret = run_delayed_tree_ref(trans, root, node, extent_op,
    2329             :                                            insert_reserved);
    2330       88601 :         else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
    2331             :                  node->type == BTRFS_SHARED_DATA_REF_KEY)
    2332       88601 :                 ret = run_delayed_data_ref(trans, root, node, extent_op,
    2333             :                                            insert_reserved);
    2334             :         else
    2335           0 :                 BUG();
    2336      194671 :         return ret;
    2337             : }
    2338             : 
    2339             : static noinline struct btrfs_delayed_ref_node *
    2340      386450 : select_delayed_ref(struct btrfs_delayed_ref_head *head)
    2341             : {
    2342             :         struct rb_node *node;
    2343             :         struct btrfs_delayed_ref_node *ref, *last = NULL;;
    2344             : 
    2345             :         /*
    2346             :          * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
    2347             :          * this prevents ref count from going down to zero when
    2348             :          * there still are pending delayed ref.
    2349             :          */
    2350      386450 :         node = rb_first(&head->ref_root);
    2351      849080 :         while (node) {
    2352             :                 ref = rb_entry(node, struct btrfs_delayed_ref_node,
    2353             :                                 rb_node);
    2354      199788 :                 if (ref->action == BTRFS_ADD_DELAYED_REF)
    2355             :                         return ref;
    2356       76180 :                 else if (last == NULL)
    2357             :                         last = ref;
    2358       76180 :                 node = rb_next(node);
    2359             :         }
    2360             :         return last;
    2361             : }
    2362             : 
    2363             : /*
    2364             :  * Returns 0 on success or if called with an already aborted transaction.
    2365             :  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
    2366             :  */
    2367       41040 : static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
    2368             :                                              struct btrfs_root *root,
    2369             :                                              unsigned long nr)
    2370             : {
    2371             :         struct btrfs_delayed_ref_root *delayed_refs;
    2372             :         struct btrfs_delayed_ref_node *ref;
    2373             :         struct btrfs_delayed_ref_head *locked_ref = NULL;
    2374             :         struct btrfs_delayed_extent_op *extent_op;
    2375       41040 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2376       41040 :         ktime_t start = ktime_get();
    2377             :         int ret;
    2378             :         unsigned long count = 0;
    2379             :         unsigned long actual_count = 0;
    2380             :         int must_insert_reserved = 0;
    2381             : 
    2382       41041 :         delayed_refs = &trans->transaction->delayed_refs;
    2383             :         while (1) {
    2384      427488 :                 if (!locked_ref) {
    2385      232811 :                         if (count >= nr)
    2386             :                                 break;
    2387             : 
    2388             :                         spin_lock(&delayed_refs->lock);
    2389      228327 :                         locked_ref = btrfs_select_ref_head(trans);
    2390      228327 :                         if (!locked_ref) {
    2391             :                                 spin_unlock(&delayed_refs->lock);
    2392             :                                 break;
    2393             :                         }
    2394             : 
    2395             :                         /* grab the lock that says we are going to process
    2396             :                          * all the refs for this head */
    2397      191772 :                         ret = btrfs_delayed_ref_lock(trans, locked_ref);
    2398             :                         spin_unlock(&delayed_refs->lock);
    2399             :                         /*
    2400             :                          * we may have dropped the spin lock to get the head
    2401             :                          * mutex lock, and that might have given someone else
    2402             :                          * time to free the head.  If that's true, it has been
    2403             :                          * removed from our list and we can move on.
    2404             :                          */
    2405      191772 :                         if (ret == -EAGAIN) {
    2406             :                                 locked_ref = NULL;
    2407           0 :                                 count++;
    2408           0 :                                 continue;
    2409             :                         }
    2410             :                 }
    2411             : 
    2412             :                 /*
    2413             :                  * We need to try and merge add/drops of the same ref since we
    2414             :                  * can run into issues with relocate dropping the implicit ref
    2415             :                  * and then it being added back again before the drop can
    2416             :                  * finish.  If we merged anything we need to re-loop so we can
    2417             :                  * get a good ref.
    2418             :                  */
    2419             :                 spin_lock(&locked_ref->lock);
    2420      386444 :                 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
    2421             :                                          locked_ref);
    2422             : 
    2423             :                 /*
    2424             :                  * locked_ref is the head node, so we have to go one
    2425             :                  * node back for any delayed ref updates
    2426             :                  */
    2427      386450 :                 ref = select_delayed_ref(locked_ref);
    2428             : 
    2429      401097 :                 if (ref && ref->seq &&
    2430       14648 :                     btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
    2431             :                         spin_unlock(&locked_ref->lock);
    2432             :                         btrfs_delayed_ref_unlock(locked_ref);
    2433             :                         spin_lock(&delayed_refs->lock);
    2434           8 :                         locked_ref->processing = 0;
    2435           8 :                         delayed_refs->num_heads_ready++;
    2436             :                         spin_unlock(&delayed_refs->lock);
    2437             :                         locked_ref = NULL;
    2438           8 :                         cond_resched();
    2439           8 :                         count++;
    2440           8 :                         continue;
    2441             :                 }
    2442             : 
    2443             :                 /*
    2444             :                  * record the must insert reserved flag before we
    2445             :                  * drop the spin lock.
    2446             :                  */
    2447      386441 :                 must_insert_reserved = locked_ref->must_insert_reserved;
    2448      386441 :                 locked_ref->must_insert_reserved = 0;
    2449             : 
    2450      386441 :                 extent_op = locked_ref->extent_op;
    2451      386441 :                 locked_ref->extent_op = NULL;
    2452             : 
    2453      386441 :                 if (!ref) {
    2454             : 
    2455             : 
    2456             :                         /* All delayed refs have been processed, Go ahead
    2457             :                          * and send the head node to run_one_delayed_ref,
    2458             :                          * so that any accounting fixes can happen
    2459             :                          */
    2460      191769 :                         ref = &locked_ref->node;
    2461             : 
    2462      191769 :                         if (extent_op && must_insert_reserved) {
    2463             :                                 btrfs_free_delayed_extent_op(extent_op);
    2464             :                                 extent_op = NULL;
    2465             :                         }
    2466             : 
    2467      191769 :                         if (extent_op) {
    2468             :                                 spin_unlock(&locked_ref->lock);
    2469           9 :                                 ret = run_delayed_extent_op(trans, root,
    2470             :                                                             ref, extent_op);
    2471             :                                 btrfs_free_delayed_extent_op(extent_op);
    2472             : 
    2473           9 :                                 if (ret) {
    2474             :                                         /*
    2475             :                                          * Need to reset must_insert_reserved if
    2476             :                                          * there was an error so the abort stuff
    2477             :                                          * can cleanup the reserved space
    2478             :                                          * properly.
    2479             :                                          */
    2480           0 :                                         if (must_insert_reserved)
    2481           0 :                                                 locked_ref->must_insert_reserved = 1;
    2482           0 :                                         locked_ref->processing = 0;
    2483             :                                         btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
    2484             :                                         btrfs_delayed_ref_unlock(locked_ref);
    2485           0 :                                         return ret;
    2486             :                                 }
    2487           9 :                                 continue;
    2488             :                         }
    2489             : 
    2490             :                         /*
    2491             :                          * Need to drop our head ref lock and re-aqcuire the
    2492             :                          * delayed ref lock and then re-check to make sure
    2493             :                          * nobody got added.
    2494             :                          */
    2495             :                         spin_unlock(&locked_ref->lock);
    2496             :                         spin_lock(&delayed_refs->lock);
    2497             :                         spin_lock(&locked_ref->lock);
    2498      383528 :                         if (rb_first(&locked_ref->ref_root) ||
    2499      191764 :                             locked_ref->extent_op) {
    2500             :                                 spin_unlock(&locked_ref->lock);
    2501             :                                 spin_unlock(&delayed_refs->lock);
    2502           0 :                                 continue;
    2503             :                         }
    2504      191764 :                         ref->in_tree = 0;
    2505      191764 :                         delayed_refs->num_heads--;
    2506      191764 :                         rb_erase(&locked_ref->href_node,
    2507             :                                  &delayed_refs->href_root);
    2508             :                         spin_unlock(&delayed_refs->lock);
    2509             :                 } else {
    2510      194672 :                         actual_count++;
    2511      194672 :                         ref->in_tree = 0;
    2512      194672 :                         rb_erase(&ref->rb_node, &locked_ref->ref_root);
    2513             :                 }
    2514      386436 :                 atomic_dec(&delayed_refs->num_entries);
    2515             : 
    2516      386431 :                 if (!btrfs_delayed_ref_is_head(ref)) {
    2517             :                         /*
    2518             :                          * when we play the delayed ref, also correct the
    2519             :                          * ref_mod on head
    2520             :                          */
    2521      194671 :                         switch (ref->action) {
    2522             :                         case BTRFS_ADD_DELAYED_REF:
    2523             :                         case BTRFS_ADD_DELAYED_EXTENT:
    2524      123605 :                                 locked_ref->node.ref_mod -= ref->ref_mod;
    2525      123605 :                                 break;
    2526             :                         case BTRFS_DROP_DELAYED_REF:
    2527       71066 :                                 locked_ref->node.ref_mod += ref->ref_mod;
    2528       71066 :                                 break;
    2529             :                         default:
    2530           0 :                                 WARN_ON(1);
    2531             :                         }
    2532             :                 }
    2533             :                 spin_unlock(&locked_ref->lock);
    2534             : 
    2535      386427 :                 ret = run_one_delayed_ref(trans, root, ref, extent_op,
    2536             :                                           must_insert_reserved);
    2537             : 
    2538             :                 btrfs_free_delayed_extent_op(extent_op);
    2539      386430 :                 if (ret) {
    2540           0 :                         locked_ref->processing = 0;
    2541             :                         btrfs_delayed_ref_unlock(locked_ref);
    2542           0 :                         btrfs_put_delayed_ref(ref);
    2543             :                         btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
    2544           0 :                         return ret;
    2545             :                 }
    2546             : 
    2547             :                 /*
    2548             :                  * If this node is a head, that means all the refs in this head
    2549             :                  * have been dealt with, and we will pick the next head to deal
    2550             :                  * with, so we must unlock the head and drop it from the cluster
    2551             :                  * list before we release it.
    2552             :                  */
    2553      386430 :                 if (btrfs_delayed_ref_is_head(ref)) {
    2554             :                         btrfs_delayed_ref_unlock(locked_ref);
    2555             :                         locked_ref = NULL;
    2556             :                 }
    2557      386429 :                 btrfs_put_delayed_ref(ref);
    2558      386432 :                 count++;
    2559      386432 :                 cond_resched();
    2560             :         }
    2561             : 
    2562             :         /*
    2563             :          * We don't want to include ref heads since we can have empty ref heads
    2564             :          * and those will drastically skew our runtime down since we just do
    2565             :          * accounting, no actual extent tree updates.
    2566             :          */
    2567       41041 :         if (actual_count > 0) {
    2568       10094 :                 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
    2569             :                 u64 avg;
    2570             : 
    2571             :                 /*
    2572             :                  * We weigh the current average higher than our current runtime
    2573             :                  * to avoid large swings in the average.
    2574             :                  */
    2575             :                 spin_lock(&delayed_refs->lock);
    2576       10094 :                 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
    2577             :                 avg = div64_u64(avg, 4);
    2578       10094 :                 fs_info->avg_delayed_ref_runtime = avg;
    2579             :                 spin_unlock(&delayed_refs->lock);
    2580             :         }
    2581             :         return 0;
    2582             : }
    2583             : 
    2584             : #ifdef SCRAMBLE_DELAYED_REFS
    2585             : /*
    2586             :  * Normally delayed refs get processed in ascending bytenr order. This
    2587             :  * correlates in most cases to the order added. To expose dependencies on this
    2588             :  * order, we start to process the tree in the middle instead of the beginning
    2589             :  */
    2590             : static u64 find_middle(struct rb_root *root)
    2591             : {
    2592             :         struct rb_node *n = root->rb_node;
    2593             :         struct btrfs_delayed_ref_node *entry;
    2594             :         int alt = 1;
    2595             :         u64 middle;
    2596             :         u64 first = 0, last = 0;
    2597             : 
    2598             :         n = rb_first(root);
    2599             :         if (n) {
    2600             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2601             :                 first = entry->bytenr;
    2602             :         }
    2603             :         n = rb_last(root);
    2604             :         if (n) {
    2605             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2606             :                 last = entry->bytenr;
    2607             :         }
    2608             :         n = root->rb_node;
    2609             : 
    2610             :         while (n) {
    2611             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2612             :                 WARN_ON(!entry->in_tree);
    2613             : 
    2614             :                 middle = entry->bytenr;
    2615             : 
    2616             :                 if (alt)
    2617             :                         n = n->rb_left;
    2618             :                 else
    2619             :                         n = n->rb_right;
    2620             : 
    2621             :                 alt = 1 - alt;
    2622             :         }
    2623             :         return middle;
    2624             : }
    2625             : #endif
    2626             : 
    2627             : static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
    2628             : {
    2629             :         u64 num_bytes;
    2630             : 
    2631      173077 :         num_bytes = heads * (sizeof(struct btrfs_extent_item) +
    2632             :                              sizeof(struct btrfs_extent_inline_ref));
    2633      346154 :         if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
    2634      173076 :                 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
    2635             : 
    2636             :         /*
    2637             :          * We don't ever fill up leaves all the way so multiply by 2 just to be
    2638             :          * closer to what we're really going to want to ouse.
    2639             :          */
    2640      173077 :         return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
    2641             : }
    2642             : 
    2643      173077 : int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
    2644      346154 :                                        struct btrfs_root *root)
    2645             : {
    2646             :         struct btrfs_block_rsv *global_rsv;
    2647      173077 :         u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
    2648             :         u64 num_bytes;
    2649             :         int ret = 0;
    2650             : 
    2651             :         num_bytes = btrfs_calc_trans_metadata_size(root, 1);
    2652             :         num_heads = heads_to_leaves(root, num_heads);
    2653      173077 :         if (num_heads > 1)
    2654       45407 :                 num_bytes += (num_heads - 1) * root->leafsize;
    2655      173077 :         num_bytes <<= 1;
    2656             :         global_rsv = &root->fs_info->global_block_rsv;
    2657             : 
    2658             :         /*
    2659             :          * If we can't allocate any more chunks lets make sure we have _lots_ of
    2660             :          * wiggle room since running delayed refs can create more delayed refs.
    2661             :          */
    2662      173077 :         if (global_rsv->space_info->full)
    2663           0 :                 num_bytes <<= 1;
    2664             : 
    2665             :         spin_lock(&global_rsv->lock);
    2666      173095 :         if (global_rsv->reserved <= num_bytes)
    2667             :                 ret = 1;
    2668             :         spin_unlock(&global_rsv->lock);
    2669      173093 :         return ret;
    2670             : }
    2671             : 
    2672      175227 : int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
    2673             :                                        struct btrfs_root *root)
    2674             : {
    2675      175227 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2676      175227 :         u64 num_entries =
    2677      175227 :                 atomic_read(&trans->transaction->delayed_refs.num_entries);
    2678             :         u64 avg_runtime;
    2679             :         u64 val;
    2680             : 
    2681      175227 :         smp_mb();
    2682      175239 :         avg_runtime = fs_info->avg_delayed_ref_runtime;
    2683      175239 :         val = num_entries * avg_runtime;
    2684      175239 :         if (num_entries * avg_runtime >= NSEC_PER_SEC)
    2685             :                 return 1;
    2686      175207 :         if (val >= NSEC_PER_SEC / 2)
    2687             :                 return 2;
    2688             : 
    2689      173087 :         return btrfs_check_space_for_delayed_refs(trans, root);
    2690             : }
    2691             : 
    2692             : struct async_delayed_refs {
    2693             :         struct btrfs_root *root;
    2694             :         int count;
    2695             :         int error;
    2696             :         int sync;
    2697             :         struct completion wait;
    2698             :         struct btrfs_work work;
    2699             : };
    2700             : 
    2701        2152 : static void delayed_ref_async_start(struct btrfs_work *work)
    2702             : {
    2703             :         struct async_delayed_refs *async;
    2704             :         struct btrfs_trans_handle *trans;
    2705             :         int ret;
    2706             : 
    2707        2152 :         async = container_of(work, struct async_delayed_refs, work);
    2708             : 
    2709        2152 :         trans = btrfs_join_transaction(async->root);
    2710        2152 :         if (IS_ERR(trans)) {
    2711           0 :                 async->error = PTR_ERR(trans);
    2712           0 :                 goto done;
    2713             :         }
    2714             : 
    2715             :         /*
    2716             :          * trans->sync means that when we call end_transaciton, we won't
    2717             :          * wait on delayed refs
    2718             :          */
    2719        2152 :         trans->sync = true;
    2720        2152 :         ret = btrfs_run_delayed_refs(trans, async->root, async->count);
    2721        2152 :         if (ret)
    2722           0 :                 async->error = ret;
    2723             : 
    2724        2152 :         ret = btrfs_end_transaction(trans, async->root);
    2725        2152 :         if (ret && !async->error)
    2726           0 :                 async->error = ret;
    2727             : done:
    2728        2152 :         if (async->sync)
    2729          31 :                 complete(&async->wait);
    2730             :         else
    2731        2121 :                 kfree(async);
    2732        2152 : }
    2733             : 
    2734        2152 : int btrfs_async_run_delayed_refs(struct btrfs_root *root,
    2735             :                                  unsigned long count, int wait)
    2736             : {
    2737             :         struct async_delayed_refs *async;
    2738             :         int ret;
    2739             : 
    2740             :         async = kmalloc(sizeof(*async), GFP_NOFS);
    2741        2152 :         if (!async)
    2742             :                 return -ENOMEM;
    2743             : 
    2744        2152 :         async->root = root->fs_info->tree_root;
    2745        2152 :         async->count = count;
    2746        2152 :         async->error = 0;
    2747        2152 :         if (wait)
    2748          31 :                 async->sync = 1;
    2749             :         else
    2750        2121 :                 async->sync = 0;
    2751             :         init_completion(&async->wait);
    2752             : 
    2753        2152 :         btrfs_init_work(&async->work, btrfs_extent_refs_helper,
    2754             :                         delayed_ref_async_start, NULL, NULL);
    2755             : 
    2756        2152 :         btrfs_queue_work(root->fs_info->extent_workers, &async->work);
    2757             : 
    2758        2152 :         if (wait) {
    2759          31 :                 wait_for_completion(&async->wait);
    2760          31 :                 ret = async->error;
    2761          31 :                 kfree(async);
    2762          31 :                 return ret;
    2763             :         }
    2764             :         return 0;
    2765             : }
    2766             : 
    2767             : /*
    2768             :  * this starts processing the delayed reference count updates and
    2769             :  * extent insertions we have queued up so far.  count can be
    2770             :  * 0, which means to process everything in the tree at the start
    2771             :  * of the run (but not newly added entries), or it can be some target
    2772             :  * number you'd like to process.
    2773             :  *
    2774             :  * Returns 0 on success or if called with an aborted transaction
    2775             :  * Returns <0 on error and aborts the transaction
    2776             :  */
    2777       41030 : int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
    2778             :                            struct btrfs_root *root, unsigned long count)
    2779             : {
    2780             :         struct rb_node *node;
    2781             :         struct btrfs_delayed_ref_root *delayed_refs;
    2782             :         struct btrfs_delayed_ref_head *head;
    2783             :         int ret;
    2784             :         int run_all = count == (unsigned long)-1;
    2785             :         int run_most = 0;
    2786             : 
    2787             :         /* We'll clean this up in btrfs_cleanup_transaction */
    2788       41030 :         if (trans->aborted)
    2789             :                 return 0;
    2790             : 
    2791       41030 :         if (root == root->fs_info->extent_root)
    2792       16277 :                 root = root->fs_info->tree_root;
    2793             : 
    2794       41030 :         delayed_refs = &trans->transaction->delayed_refs;
    2795       41030 :         if (count == 0) {
    2796        4378 :                 count = atomic_read(&delayed_refs->num_entries) * 2;
    2797             :                 run_most = 1;
    2798             :         }
    2799             : 
    2800             : again:
    2801             : #ifdef SCRAMBLE_DELAYED_REFS
    2802             :         delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
    2803             : #endif
    2804       41040 :         ret = __btrfs_run_delayed_refs(trans, root, count);
    2805       41041 :         if (ret < 0) {
    2806           0 :                 btrfs_abort_transaction(trans, root, ret);
    2807           0 :                 return ret;
    2808             :         }
    2809             : 
    2810       41041 :         if (run_all) {
    2811       68994 :                 if (!list_empty(&trans->new_bgs))
    2812          22 :                         btrfs_create_pending_block_groups(trans, root);
    2813             : 
    2814             :                 spin_lock(&delayed_refs->lock);
    2815       34497 :                 node = rb_first(&delayed_refs->href_root);
    2816       34497 :                 if (!node) {
    2817             :                         spin_unlock(&delayed_refs->lock);
    2818             :                         goto out;
    2819             :                 }
    2820             :                 count = (unsigned long)-1;
    2821             : 
    2822          10 :                 while (node) {
    2823             :                         head = rb_entry(node, struct btrfs_delayed_ref_head,
    2824             :                                         href_node);
    2825          10 :                         if (btrfs_delayed_ref_is_head(&head->node)) {
    2826             :                                 struct btrfs_delayed_ref_node *ref;
    2827             : 
    2828          10 :                                 ref = &head->node;
    2829          10 :                                 atomic_inc(&ref->refs);
    2830             : 
    2831             :                                 spin_unlock(&delayed_refs->lock);
    2832             :                                 /*
    2833             :                                  * Mutex was contended, block until it's
    2834             :                                  * released and try again
    2835             :                                  */
    2836          10 :                                 mutex_lock(&head->mutex);
    2837          10 :                                 mutex_unlock(&head->mutex);
    2838             : 
    2839          10 :                                 btrfs_put_delayed_ref(ref);
    2840          10 :                                 cond_resched();
    2841          10 :                                 goto again;
    2842             :                         } else {
    2843           0 :                                 WARN_ON(1);
    2844             :                         }
    2845           0 :                         node = rb_next(node);
    2846             :                 }
    2847             :                 spin_unlock(&delayed_refs->lock);
    2848           0 :                 cond_resched();
    2849           0 :                 goto again;
    2850             :         }
    2851             : out:
    2852       41031 :         ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
    2853       41031 :         if (ret)
    2854             :                 return ret;
    2855       41031 :         assert_qgroups_uptodate(trans);
    2856       41031 :         return 0;
    2857             : }
    2858             : 
    2859         412 : int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
    2860             :                                 struct btrfs_root *root,
    2861             :                                 u64 bytenr, u64 num_bytes, u64 flags,
    2862             :                                 int level, int is_data)
    2863             : {
    2864             :         struct btrfs_delayed_extent_op *extent_op;
    2865             :         int ret;
    2866             : 
    2867             :         extent_op = btrfs_alloc_delayed_extent_op();
    2868         412 :         if (!extent_op)
    2869             :                 return -ENOMEM;
    2870             : 
    2871         412 :         extent_op->flags_to_set = flags;
    2872         412 :         extent_op->update_flags = 1;
    2873         412 :         extent_op->update_key = 0;
    2874         412 :         extent_op->is_data = is_data ? 1 : 0;
    2875         412 :         extent_op->level = level;
    2876             : 
    2877         412 :         ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
    2878             :                                           num_bytes, extent_op);
    2879         412 :         if (ret)
    2880             :                 btrfs_free_delayed_extent_op(extent_op);
    2881         412 :         return ret;
    2882             : }
    2883             : 
    2884        6654 : static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
    2885             :                                       struct btrfs_root *root,
    2886             :                                       struct btrfs_path *path,
    2887             :                                       u64 objectid, u64 offset, u64 bytenr)
    2888             : {
    2889             :         struct btrfs_delayed_ref_head *head;
    2890             :         struct btrfs_delayed_ref_node *ref;
    2891             :         struct btrfs_delayed_data_ref *data_ref;
    2892             :         struct btrfs_delayed_ref_root *delayed_refs;
    2893             :         struct rb_node *node;
    2894             :         int ret = 0;
    2895             : 
    2896        6654 :         delayed_refs = &trans->transaction->delayed_refs;
    2897             :         spin_lock(&delayed_refs->lock);
    2898        6654 :         head = btrfs_find_delayed_ref_head(trans, bytenr);
    2899        6654 :         if (!head) {
    2900             :                 spin_unlock(&delayed_refs->lock);
    2901        6563 :                 return 0;
    2902             :         }
    2903             : 
    2904          91 :         if (!mutex_trylock(&head->mutex)) {
    2905           0 :                 atomic_inc(&head->node.refs);
    2906             :                 spin_unlock(&delayed_refs->lock);
    2907             : 
    2908           0 :                 btrfs_release_path(path);
    2909             : 
    2910             :                 /*
    2911             :                  * Mutex was contended, block until it's released and let
    2912             :                  * caller try again
    2913             :                  */
    2914           0 :                 mutex_lock(&head->mutex);
    2915           0 :                 mutex_unlock(&head->mutex);
    2916           0 :                 btrfs_put_delayed_ref(&head->node);
    2917           0 :                 return -EAGAIN;
    2918             :         }
    2919             :         spin_unlock(&delayed_refs->lock);
    2920             : 
    2921             :         spin_lock(&head->lock);
    2922          91 :         node = rb_first(&head->ref_root);
    2923         270 :         while (node) {
    2924             :                 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
    2925          88 :                 node = rb_next(node);
    2926             : 
    2927             :                 /* If it's a shared ref we know a cross reference exists */
    2928          88 :                 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
    2929             :                         ret = 1;
    2930             :                         break;
    2931             :                 }
    2932             : 
    2933             :                 data_ref = btrfs_delayed_node_to_data_ref(ref);
    2934             : 
    2935             :                 /*
    2936             :                  * If our ref doesn't match the one we're currently looking at
    2937             :                  * then we have a cross reference.
    2938             :                  */
    2939         176 :                 if (data_ref->root != root->root_key.objectid ||
    2940         176 :                     data_ref->objectid != objectid ||
    2941          88 :                     data_ref->offset != offset) {
    2942             :                         ret = 1;
    2943             :                         break;
    2944             :                 }
    2945             :         }
    2946             :         spin_unlock(&head->lock);
    2947          91 :         mutex_unlock(&head->mutex);
    2948          91 :         return ret;
    2949             : }
    2950             : 
    2951        6754 : static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
    2952             :                                         struct btrfs_root *root,
    2953             :                                         struct btrfs_path *path,
    2954             :                                         u64 objectid, u64 offset, u64 bytenr)
    2955             : {
    2956        6754 :         struct btrfs_root *extent_root = root->fs_info->extent_root;
    2957             :         struct extent_buffer *leaf;
    2958             :         struct btrfs_extent_data_ref *ref;
    2959             :         struct btrfs_extent_inline_ref *iref;
    2960             :         struct btrfs_extent_item *ei;
    2961             :         struct btrfs_key key;
    2962             :         u32 item_size;
    2963             :         int ret;
    2964             : 
    2965        6754 :         key.objectid = bytenr;
    2966        6754 :         key.offset = (u64)-1;
    2967        6754 :         key.type = BTRFS_EXTENT_ITEM_KEY;
    2968             : 
    2969        6754 :         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
    2970        6754 :         if (ret < 0)
    2971             :                 goto out;
    2972        6754 :         BUG_ON(ret == 0); /* Corruption */
    2973             : 
    2974             :         ret = -ENOENT;
    2975        6754 :         if (path->slots[0] == 0)
    2976             :                 goto out;
    2977             : 
    2978        6754 :         path->slots[0]--;
    2979        6754 :         leaf = path->nodes[0];
    2980        6754 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2981             : 
    2982        6754 :         if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
    2983             :                 goto out;
    2984             : 
    2985             :         ret = 1;
    2986        6694 :         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    2987             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    2988        6694 :         if (item_size < sizeof(*ei)) {
    2989           0 :                 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
    2990             :                 goto out;
    2991             :         }
    2992             : #endif
    2993       13388 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    2994             : 
    2995       13388 :         if (item_size != sizeof(*ei) +
    2996        6694 :             btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
    2997             :                 goto out;
    2998             : 
    2999        6594 :         if (btrfs_extent_generation(leaf, ei) <=
    3000             :             btrfs_root_last_snapshot(&root->root_item))
    3001             :                 goto out;
    3002             : 
    3003        6594 :         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
    3004        6594 :         if (btrfs_extent_inline_ref_type(leaf, iref) !=
    3005             :             BTRFS_EXTENT_DATA_REF_KEY)
    3006             :                 goto out;
    3007             : 
    3008        6594 :         ref = (struct btrfs_extent_data_ref *)(&iref->offset);
    3009        6594 :         if (btrfs_extent_refs(leaf, ei) !=
    3010        6594 :             btrfs_extent_data_ref_count(leaf, ref) ||
    3011             :             btrfs_extent_data_ref_root(leaf, ref) !=
    3012       13188 :             root->root_key.objectid ||
    3013        6594 :             btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
    3014             :             btrfs_extent_data_ref_offset(leaf, ref) != offset)
    3015             :                 goto out;
    3016             : 
    3017             :         ret = 0;
    3018             : out:
    3019        6754 :         return ret;
    3020             : }
    3021             : 
    3022        6754 : int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
    3023             :                           struct btrfs_root *root,
    3024             :                           u64 objectid, u64 offset, u64 bytenr)
    3025             : {
    3026             :         struct btrfs_path *path;
    3027             :         int ret;
    3028             :         int ret2;
    3029             : 
    3030        6754 :         path = btrfs_alloc_path();
    3031        6754 :         if (!path)
    3032             :                 return -ENOENT;
    3033             : 
    3034             :         do {
    3035        6754 :                 ret = check_committed_ref(trans, root, path, objectid,
    3036             :                                           offset, bytenr);
    3037        6754 :                 if (ret && ret != -ENOENT)
    3038             :                         goto out;
    3039             : 
    3040        6654 :                 ret2 = check_delayed_ref(trans, root, path, objectid,
    3041             :                                          offset, bytenr);
    3042        6654 :         } while (ret2 == -EAGAIN);
    3043             : 
    3044        6654 :         if (ret2 && ret2 != -ENOENT) {
    3045             :                 ret = ret2;
    3046             :                 goto out;
    3047             :         }
    3048             : 
    3049        6654 :         if (ret != -ENOENT || ret2 != -ENOENT)
    3050             :                 ret = 0;
    3051             : out:
    3052        6754 :         btrfs_free_path(path);
    3053        6754 :         if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
    3054        1021 :                 WARN_ON(ret > 0);
    3055        6754 :         return ret;
    3056             : }
    3057             : 
    3058        2201 : static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
    3059        3630 :                            struct btrfs_root *root,
    3060        2201 :                            struct extent_buffer *buf,
    3061             :                            int full_backref, int inc)
    3062             : {
    3063             :         u64 bytenr;
    3064             :         u64 num_bytes;
    3065             :         u64 parent;
    3066             :         u64 ref_root;
    3067             :         u32 nritems;
    3068             :         struct btrfs_key key;
    3069             :         struct btrfs_file_extent_item *fi;
    3070             :         int i;
    3071             :         int level;
    3072             :         int ret = 0;
    3073             :         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
    3074             :                             u64, u64, u64, u64, u64, u64, int);
    3075             : 
    3076             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    3077             :         if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
    3078             :                 return 0;
    3079             : #endif
    3080             :         ref_root = btrfs_header_owner(buf);
    3081             :         nritems = btrfs_header_nritems(buf);
    3082        2201 :         level = btrfs_header_level(buf);
    3083             : 
    3084        2201 :         if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
    3085             :                 return 0;
    3086             : 
    3087        2201 :         if (inc)
    3088             :                 process_func = btrfs_inc_extent_ref;
    3089             :         else
    3090             :                 process_func = btrfs_free_extent;
    3091             : 
    3092        2201 :         if (full_backref)
    3093        1654 :                 parent = buf->start;
    3094             :         else
    3095             :                 parent = 0;
    3096             : 
    3097       61341 :         for (i = 0; i < nritems; i++) {
    3098       59140 :                 if (level == 0) {
    3099       55510 :                         btrfs_item_key_to_cpu(buf, &key, i);
    3100       55510 :                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
    3101       30560 :                                 continue;
    3102       24950 :                         fi = btrfs_item_ptr(buf, i,
    3103             :                                             struct btrfs_file_extent_item);
    3104       24950 :                         if (btrfs_file_extent_type(buf, fi) ==
    3105             :                             BTRFS_FILE_EXTENT_INLINE)
    3106         844 :                                 continue;
    3107             :                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
    3108       24106 :                         if (bytenr == 0)
    3109        3274 :                                 continue;
    3110             : 
    3111             :                         num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
    3112       41664 :                         key.offset -= btrfs_file_extent_offset(buf, fi);
    3113       20832 :                         ret = process_func(trans, root, bytenr, num_bytes,
    3114             :                                            parent, ref_root, key.objectid,
    3115             :                                            key.offset, 1);
    3116       20832 :                         if (ret)
    3117             :                                 goto fail;
    3118             :                 } else {
    3119             :                         bytenr = btrfs_node_blockptr(buf, i);
    3120        3630 :                         num_bytes = btrfs_level_size(root, level - 1);
    3121        3630 :                         ret = process_func(trans, root, bytenr, num_bytes,
    3122        3630 :                                            parent, ref_root, level - 1, 0,
    3123             :                                            1);
    3124        3630 :                         if (ret)
    3125             :                                 goto fail;
    3126             :                 }
    3127             :         }
    3128             :         return 0;
    3129             : fail:
    3130           0 :         return ret;
    3131             : }
    3132             : 
    3133        1396 : int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    3134             :                   struct extent_buffer *buf, int full_backref)
    3135             : {
    3136        1396 :         return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
    3137             : }
    3138             : 
    3139         341 : int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    3140             :                   struct extent_buffer *buf, int full_backref)
    3141             : {
    3142         794 :         return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
    3143             : }
    3144             : 
    3145        6457 : static int write_one_cache_group(struct btrfs_trans_handle *trans,
    3146             :                                  struct btrfs_root *root,
    3147             :                                  struct btrfs_path *path,
    3148             :                                  struct btrfs_block_group_cache *cache)
    3149             : {
    3150             :         int ret;
    3151        6457 :         struct btrfs_root *extent_root = root->fs_info->extent_root;
    3152             :         unsigned long bi;
    3153             :         struct extent_buffer *leaf;
    3154             : 
    3155        6457 :         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
    3156        6457 :         if (ret < 0)
    3157             :                 goto fail;
    3158        6457 :         BUG_ON(ret); /* Corruption */
    3159             : 
    3160        6457 :         leaf = path->nodes[0];
    3161       12914 :         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
    3162        6457 :         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
    3163        6457 :         btrfs_mark_buffer_dirty(leaf);
    3164        6457 :         btrfs_release_path(path);
    3165             : fail:
    3166        6457 :         if (ret) {
    3167           0 :                 btrfs_abort_transaction(trans, root, ret);
    3168           0 :                 return ret;
    3169             :         }
    3170             :         return 0;
    3171             : 
    3172             : }
    3173             : 
    3174             : static struct btrfs_block_group_cache *
    3175      254724 : next_block_group(struct btrfs_root *root,
    3176             :                  struct btrfs_block_group_cache *cache)
    3177             : {
    3178             :         struct rb_node *node;
    3179      254724 :         spin_lock(&root->fs_info->block_group_cache_lock);
    3180      254724 :         node = rb_next(&cache->cache_node);
    3181      254724 :         btrfs_put_block_group(cache);
    3182      254724 :         if (node) {
    3183      220280 :                 cache = rb_entry(node, struct btrfs_block_group_cache,
    3184             :                                  cache_node);
    3185             :                 btrfs_get_block_group(cache);
    3186             :         } else
    3187             :                 cache = NULL;
    3188      254724 :         spin_unlock(&root->fs_info->block_group_cache_lock);
    3189      254724 :         return cache;
    3190             : }
    3191             : 
    3192        5895 : static int cache_save_setup(struct btrfs_block_group_cache *block_group,
    3193             :                             struct btrfs_trans_handle *trans,
    3194             :                             struct btrfs_path *path)
    3195             : {
    3196        5895 :         struct btrfs_root *root = block_group->fs_info->tree_root;
    3197        4088 :         struct inode *inode = NULL;
    3198        5895 :         u64 alloc_hint = 0;
    3199             :         int dcs = BTRFS_DC_ERROR;
    3200             :         int num_pages = 0;
    3201             :         int retries = 0;
    3202             :         int ret = 0;
    3203             : 
    3204             :         /*
    3205             :          * If this block group is smaller than 100 megs don't bother caching the
    3206             :          * block group.
    3207             :          */
    3208        5895 :         if (block_group->key.offset < (100 * 1024 * 1024)) {
    3209             :                 spin_lock(&block_group->lock);
    3210        1807 :                 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
    3211             :                 spin_unlock(&block_group->lock);
    3212        1807 :                 return 0;
    3213             :         }
    3214             : 
    3215             : again:
    3216        4267 :         inode = lookup_free_space_inode(root, block_group, path);
    3217        4446 :         if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
    3218           0 :                 ret = PTR_ERR(inode);
    3219           0 :                 btrfs_release_path(path);
    3220           0 :                 goto out;
    3221             :         }
    3222             : 
    3223        4267 :         if (IS_ERR(inode)) {
    3224         179 :                 BUG_ON(retries);
    3225         179 :                 retries++;
    3226             : 
    3227         179 :                 if (block_group->ro)
    3228             :                         goto out_free;
    3229             : 
    3230         179 :                 ret = create_free_space_inode(root, trans, block_group, path);
    3231         179 :                 if (ret)
    3232             :                         goto out_free;
    3233             :                 goto again;
    3234             :         }
    3235             : 
    3236             :         /* We've already setup this transaction, go ahead and exit */
    3237        4170 :         if (block_group->cache_generation == trans->transid &&
    3238             :             i_size_read(inode)) {
    3239             :                 dcs = BTRFS_DC_SETUP;
    3240             :                 goto out_put;
    3241             :         }
    3242             : 
    3243             :         /*
    3244             :          * We want to set the generation to 0, that way if anything goes wrong
    3245             :          * from here on out we know not to trust this cache when we load up next
    3246             :          * time.
    3247             :          */
    3248        4006 :         BTRFS_I(inode)->generation = 0;
    3249        4006 :         ret = btrfs_update_inode(trans, root, inode);
    3250        4006 :         WARN_ON(ret);
    3251             : 
    3252        4006 :         if (i_size_read(inode) > 0) {
    3253        3760 :                 ret = btrfs_check_trunc_cache_free_space(root,
    3254        3760 :                                         &root->fs_info->global_block_rsv);
    3255        3760 :                 if (ret)
    3256             :                         goto out_put;
    3257             : 
    3258        3760 :                 ret = btrfs_truncate_free_space_cache(root, trans, inode);
    3259        3760 :                 if (ret)
    3260             :                         goto out_put;
    3261             :         }
    3262             : 
    3263             :         spin_lock(&block_group->lock);
    3264        8005 :         if (block_group->cached != BTRFS_CACHE_FINISHED ||
    3265        7998 :             !btrfs_test_opt(root, SPACE_CACHE) ||
    3266        3999 :             block_group->delalloc_bytes) {
    3267             :                 /*
    3268             :                  * don't bother trying to write stuff out _if_
    3269             :                  * a) we're not cached,
    3270             :                  * b) we're with nospace_cache mount option.
    3271             :                  */
    3272             :                 dcs = BTRFS_DC_WRITTEN;
    3273             :                 spin_unlock(&block_group->lock);
    3274             :                 goto out_put;
    3275             :         }
    3276             :         spin_unlock(&block_group->lock);
    3277             : 
    3278             :         /*
    3279             :          * Try to preallocate enough space based on how big the block group is.
    3280             :          * Keep in mind this has to include any pinned space which could end up
    3281             :          * taking up quite a bit since it's not folded into the other space
    3282             :          * cache.
    3283             :          */
    3284        7950 :         num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
    3285        3975 :         if (!num_pages)
    3286             :                 num_pages = 1;
    3287             : 
    3288        3975 :         num_pages *= 16;
    3289        3975 :         num_pages *= PAGE_CACHE_SIZE;
    3290             : 
    3291        3975 :         ret = btrfs_check_data_free_space(inode, num_pages);
    3292        3975 :         if (ret)
    3293             :                 goto out_put;
    3294             : 
    3295        3975 :         ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
    3296             :                                               num_pages, num_pages,
    3297             :                                               &alloc_hint);
    3298        3975 :         if (!ret)
    3299             :                 dcs = BTRFS_DC_SETUP;
    3300        3975 :         btrfs_free_reserved_data_space(inode, num_pages);
    3301             : 
    3302             : out_put:
    3303        4088 :         iput(inode);
    3304             : out_free:
    3305        4088 :         btrfs_release_path(path);
    3306             : out:
    3307             :         spin_lock(&block_group->lock);
    3308        4088 :         if (!ret && dcs == BTRFS_DC_SETUP)
    3309        4057 :                 block_group->cache_generation = trans->transid;
    3310        4088 :         block_group->disk_cache_state = dcs;
    3311             :         spin_unlock(&block_group->lock);
    3312             : 
    3313        4088 :         return ret;
    3314             : }
    3315             : 
    3316        9663 : int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
    3317             :                                    struct btrfs_root *root)
    3318             : {
    3319             :         struct btrfs_block_group_cache *cache;
    3320             :         int err = 0;
    3321             :         struct btrfs_path *path;
    3322             :         u64 last = 0;
    3323             : 
    3324        9663 :         path = btrfs_alloc_path();
    3325        9663 :         if (!path)
    3326             :                 return -ENOMEM;
    3327             : 
    3328             : again:
    3329             :         while (1) {
    3330       20600 :                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
    3331      131656 :                 while (cache) {
    3332       96351 :                         if (cache->disk_cache_state == BTRFS_DC_CLEAR)
    3333             :                                 break;
    3334       90456 :                         cache = next_block_group(root, cache);
    3335             :                 }
    3336       20600 :                 if (!cache) {
    3337       14705 :                         if (last == 0)
    3338             :                                 break;
    3339             :                         last = 0;
    3340        3570 :                         continue;
    3341             :                 }
    3342        5895 :                 err = cache_save_setup(cache, trans, path);
    3343        5895 :                 last = cache->key.objectid + cache->key.offset;
    3344        5895 :                 btrfs_put_block_group(cache);
    3345             :         }
    3346             : 
    3347             :         while (1) {
    3348       20434 :                 if (last == 0) {
    3349       13977 :                         err = btrfs_run_delayed_refs(trans, root,
    3350             :                                                      (unsigned long)-1);
    3351       13977 :                         if (err) /* File system offline */
    3352             :                                 goto out;
    3353             :                 }
    3354             : 
    3355       20434 :                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
    3356      126537 :                 while (cache) {
    3357       93588 :                         if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
    3358        1462 :                                 btrfs_put_block_group(cache);
    3359        1462 :                                 goto again;
    3360             :                         }
    3361             : 
    3362       92126 :                         if (cache->dirty)
    3363             :                                 break;
    3364       85669 :                         cache = next_block_group(root, cache);
    3365             :                 }
    3366       18972 :                 if (!cache) {
    3367       12515 :                         if (last == 0)
    3368             :                                 break;
    3369             :                         last = 0;
    3370        2842 :                         continue;
    3371             :                 }
    3372             : 
    3373        6457 :                 if (cache->disk_cache_state == BTRFS_DC_SETUP)
    3374        4057 :                         cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
    3375        6457 :                 cache->dirty = 0;
    3376        6457 :                 last = cache->key.objectid + cache->key.offset;
    3377             : 
    3378        6457 :                 err = write_one_cache_group(trans, root, path, cache);
    3379        6457 :                 btrfs_put_block_group(cache);
    3380        6457 :                 if (err) /* File system offline */
    3381             :                         goto out;
    3382             :         }
    3383             : 
    3384             :         while (1) {
    3385             :                 /*
    3386             :                  * I don't think this is needed since we're just marking our
    3387             :                  * preallocated extent as written, but just in case it can't
    3388             :                  * hurt.
    3389             :                  */
    3390       15883 :                 if (last == 0) {
    3391       11826 :                         err = btrfs_run_delayed_refs(trans, root,
    3392             :                                                      (unsigned long)-1);
    3393       11826 :                         if (err) /* File system offline */
    3394             :                                 goto out;
    3395             :                 }
    3396             : 
    3397       15883 :                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
    3398      108462 :                 while (cache) {
    3399             :                         /*
    3400             :                          * Really this shouldn't happen, but it could if we
    3401             :                          * couldn't write the entire preallocated extent and
    3402             :                          * splitting the extent resulted in a new block.
    3403             :                          */
    3404       80763 :                         if (cache->dirty) {
    3405          10 :                                 btrfs_put_block_group(cache);
    3406          10 :                                 goto again;
    3407             :                         }
    3408       80753 :                         if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
    3409             :                                 break;
    3410       76696 :                         cache = next_block_group(root, cache);
    3411             :                 }
    3412       15873 :                 if (!cache) {
    3413       11816 :                         if (last == 0)
    3414             :                                 break;
    3415             :                         last = 0;
    3416        2153 :                         continue;
    3417             :                 }
    3418             : 
    3419        4057 :                 err = btrfs_write_out_cache(root, trans, cache, path);
    3420             : 
    3421             :                 /*
    3422             :                  * If we didn't have an error then the cache state is still
    3423             :                  * NEED_WRITE, so we can set it to WRITTEN.
    3424             :                  */
    3425        4057 :                 if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
    3426        4057 :                         cache->disk_cache_state = BTRFS_DC_WRITTEN;
    3427        4057 :                 last = cache->key.objectid + cache->key.offset;
    3428        4057 :                 btrfs_put_block_group(cache);
    3429             :         }
    3430             : out:
    3431             : 
    3432        9663 :         btrfs_free_path(path);
    3433        9663 :         return err;
    3434             : }
    3435             : 
    3436        6754 : int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
    3437             : {
    3438             :         struct btrfs_block_group_cache *block_group;
    3439             :         int readonly = 0;
    3440             : 
    3441        6754 :         block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
    3442        6754 :         if (!block_group || block_group->ro)
    3443             :                 readonly = 1;
    3444        6754 :         if (block_group)
    3445        6754 :                 btrfs_put_block_group(block_group);
    3446        6754 :         return readonly;
    3447             : }
    3448             : 
    3449         657 : static const char *alloc_name(u64 flags)
    3450             : {
    3451         657 :         switch (flags) {
    3452             :         case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
    3453             :                 return "mixed";
    3454             :         case BTRFS_BLOCK_GROUP_METADATA:
    3455         215 :                 return "metadata";
    3456             :         case BTRFS_BLOCK_GROUP_DATA:
    3457         215 :                 return "data";
    3458             :         case BTRFS_BLOCK_GROUP_SYSTEM:
    3459         221 :                 return "system";
    3460             :         default:
    3461           0 :                 WARN_ON(1);
    3462           0 :                 return "invalid-combination";
    3463             :         };
    3464             : }
    3465             : 
    3466        1885 : static int update_space_info(struct btrfs_fs_info *info, u64 flags,
    3467             :                              u64 total_bytes, u64 bytes_used,
    3468             :                              struct btrfs_space_info **space_info)
    3469             : {
    3470             :         struct btrfs_space_info *found;
    3471             :         int i;
    3472             :         int factor;
    3473             :         int ret;
    3474             : 
    3475        1885 :         if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
    3476             :                      BTRFS_BLOCK_GROUP_RAID10))
    3477             :                 factor = 2;
    3478             :         else
    3479             :                 factor = 1;
    3480             : 
    3481             :         found = __find_space_info(info, flags);
    3482        1885 :         if (found) {
    3483             :                 spin_lock(&found->lock);
    3484        1228 :                 found->total_bytes += total_bytes;
    3485        1228 :                 found->disk_total += total_bytes * factor;
    3486        1228 :                 found->bytes_used += bytes_used;
    3487        1228 :                 found->disk_used += bytes_used * factor;
    3488        1228 :                 found->full = 0;
    3489             :                 spin_unlock(&found->lock);
    3490        1228 :                 *space_info = found;
    3491        1228 :                 return 0;
    3492             :         }
    3493         657 :         found = kzalloc(sizeof(*found), GFP_NOFS);
    3494         657 :         if (!found)
    3495             :                 return -ENOMEM;
    3496             : 
    3497         657 :         ret = percpu_counter_init(&found->total_bytes_pinned, 0);
    3498         657 :         if (ret) {
    3499           0 :                 kfree(found);
    3500           0 :                 return ret;
    3501             :         }
    3502             : 
    3503        4599 :         for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
    3504        4599 :                 INIT_LIST_HEAD(&found->block_groups[i]);
    3505         657 :         init_rwsem(&found->groups_sem);
    3506         657 :         spin_lock_init(&found->lock);
    3507         657 :         found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
    3508         657 :         found->total_bytes = total_bytes;
    3509         657 :         found->disk_total = total_bytes * factor;
    3510         657 :         found->bytes_used = bytes_used;
    3511         657 :         found->disk_used = bytes_used * factor;
    3512         657 :         found->bytes_pinned = 0;
    3513         657 :         found->bytes_reserved = 0;
    3514         657 :         found->bytes_readonly = 0;
    3515         657 :         found->bytes_may_use = 0;
    3516         657 :         found->full = 0;
    3517         657 :         found->force_alloc = CHUNK_ALLOC_NO_FORCE;
    3518         657 :         found->chunk_alloc = 0;
    3519         657 :         found->flush = 0;
    3520         657 :         init_waitqueue_head(&found->wait);
    3521             : 
    3522         657 :         ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
    3523             :                                     info->space_info_kobj, "%s",
    3524             :                                     alloc_name(found->flags));
    3525         657 :         if (ret) {
    3526           0 :                 kfree(found);
    3527           0 :                 return ret;
    3528             :         }
    3529             : 
    3530         657 :         *space_info = found;
    3531         657 :         list_add_rcu(&found->list, &info->space_info);
    3532         657 :         if (flags & BTRFS_BLOCK_GROUP_DATA)
    3533         221 :                 info->data_sinfo = found;
    3534             : 
    3535         657 :         return ret;
    3536             : }
    3537             : 
    3538        1228 : static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
    3539             : {
    3540        1228 :         u64 extra_flags = chunk_to_extended(flags) &
    3541             :                                 BTRFS_EXTENDED_PROFILE_MASK;
    3542             : 
    3543             :         write_seqlock(&fs_info->profiles_lock);
    3544        1228 :         if (flags & BTRFS_BLOCK_GROUP_DATA)
    3545         325 :                 fs_info->avail_data_alloc_bits |= extra_flags;
    3546        1228 :         if (flags & BTRFS_BLOCK_GROUP_METADATA)
    3547         466 :                 fs_info->avail_metadata_alloc_bits |= extra_flags;
    3548        1228 :         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
    3549         453 :                 fs_info->avail_system_alloc_bits |= extra_flags;
    3550             :         write_sequnlock(&fs_info->profiles_lock);
    3551        1228 : }
    3552             : 
    3553             : /*
    3554             :  * returns target flags in extended format or 0 if restripe for this
    3555             :  * chunk_type is not in progress
    3556             :  *
    3557             :  * should be called with either volume_mutex or balance_lock held
    3558             :  */
    3559      166920 : static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
    3560             : {
    3561      166920 :         struct btrfs_balance_control *bctl = fs_info->balance_ctl;
    3562             :         u64 target = 0;
    3563             : 
    3564      166920 :         if (!bctl)
    3565             :                 return 0;
    3566             : 
    3567        8207 :         if (flags & BTRFS_BLOCK_GROUP_DATA &&
    3568        1885 :             bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
    3569           0 :                 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
    3570        6501 :         } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
    3571         179 :                    bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
    3572           0 :                 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
    3573       10580 :         } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
    3574        4258 :                    bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
    3575           0 :                 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
    3576             :         }
    3577             : 
    3578             :         return target;
    3579             : }
    3580             : 
    3581             : /*
    3582             :  * @flags: available profiles in extended format (see ctree.h)
    3583             :  *
    3584             :  * Returns reduced profile in chunk format.  If profile changing is in
    3585             :  * progress (either running or paused) picks the target profile (if it's
    3586             :  * already available), otherwise falls back to plain reducing.
    3587             :  */
    3588      166789 : static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
    3589             : {
    3590      333583 :         u64 num_devices = root->fs_info->fs_devices->rw_devices;
    3591             :         u64 target;
    3592             :         u64 tmp;
    3593             : 
    3594             :         /*
    3595             :          * see if restripe for this chunk_type is in progress, if so
    3596             :          * try to reduce to the target profile
    3597             :          */
    3598             :         spin_lock(&root->fs_info->balance_lock);
    3599      333588 :         target = get_restripe_target(root->fs_info, flags);
    3600      166793 :         if (target) {
    3601             :                 /* pick target profile only if it's already available */
    3602           0 :                 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
    3603             :                         spin_unlock(&root->fs_info->balance_lock);
    3604             :                         return extended_to_chunk(target);
    3605             :                 }
    3606             :         }
    3607             :         spin_unlock(&root->fs_info->balance_lock);
    3608             : 
    3609             :         /* First, mask out the RAID levels which aren't possible */
    3610      166793 :         if (num_devices == 1)
    3611      162382 :                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 |
    3612             :                            BTRFS_BLOCK_GROUP_RAID5);
    3613      166793 :         if (num_devices < 3)
    3614      166697 :                 flags &= ~BTRFS_BLOCK_GROUP_RAID6;
    3615      166793 :         if (num_devices < 4)
    3616      166697 :                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
    3617             : 
    3618      166793 :         tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
    3619             :                        BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 |
    3620             :                        BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10);
    3621      166793 :         flags &= ~tmp;
    3622             : 
    3623      166793 :         if (tmp & BTRFS_BLOCK_GROUP_RAID6)
    3624             :                 tmp = BTRFS_BLOCK_GROUP_RAID6;
    3625      166779 :         else if (tmp & BTRFS_BLOCK_GROUP_RAID5)
    3626             :                 tmp = BTRFS_BLOCK_GROUP_RAID5;
    3627      166763 :         else if (tmp & BTRFS_BLOCK_GROUP_RAID10)
    3628             :                 tmp = BTRFS_BLOCK_GROUP_RAID10;
    3629      166749 :         else if (tmp & BTRFS_BLOCK_GROUP_RAID1)
    3630             :                 tmp = BTRFS_BLOCK_GROUP_RAID1;
    3631      163452 :         else if (tmp & BTRFS_BLOCK_GROUP_RAID0)
    3632             :                 tmp = BTRFS_BLOCK_GROUP_RAID0;
    3633             : 
    3634      166793 :         return extended_to_chunk(flags | tmp);
    3635             : }
    3636             : 
    3637      166787 : static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
    3638             : {
    3639             :         unsigned seq;
    3640             :         u64 flags;
    3641             : 
    3642             :         do {
    3643             :                 flags = orig_flags;
    3644      166786 :                 seq = read_seqbegin(&root->fs_info->profiles_lock);
    3645             : 
    3646      166787 :                 if (flags & BTRFS_BLOCK_GROUP_DATA)
    3647      102114 :                         flags |= root->fs_info->avail_data_alloc_bits;
    3648       64673 :                 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
    3649         378 :                         flags |= root->fs_info->avail_system_alloc_bits;
    3650       64295 :                 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
    3651       64295 :                         flags |= root->fs_info->avail_metadata_alloc_bits;
    3652      333575 :         } while (read_seqretry(&root->fs_info->profiles_lock, seq));
    3653             : 
    3654      166789 :         return btrfs_reduce_alloc_profile(root, flags);
    3655             : }
    3656             : 
    3657      166086 : u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
    3658             : {
    3659             :         u64 flags;
    3660             :         u64 ret;
    3661             : 
    3662      166086 :         if (data)
    3663             :                 flags = BTRFS_BLOCK_GROUP_DATA;
    3664       64193 :         else if (root == root->fs_info->chunk_root)
    3665             :                 flags = BTRFS_BLOCK_GROUP_SYSTEM;
    3666             :         else
    3667             :                 flags = BTRFS_BLOCK_GROUP_METADATA;
    3668             : 
    3669      166086 :         ret = get_alloc_profile(root, flags);
    3670      166090 :         return ret;
    3671             : }
    3672             : 
    3673             : /*
    3674             :  * This will check the space that the inode allocates from to make sure we have
    3675             :  * enough space for bytes.
    3676             :  */
    3677      159345 : int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
    3678             : {
    3679             :         struct btrfs_space_info *data_sinfo;
    3680      159345 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    3681      159345 :         struct btrfs_fs_info *fs_info = root->fs_info;
    3682             :         u64 used;
    3683             :         int ret = 0, committed = 0, alloc_chunk = 1;
    3684             : 
    3685             :         /* make sure bytes are sectorsize aligned */
    3686      159345 :         bytes = ALIGN(bytes, root->sectorsize);
    3687             : 
    3688      159345 :         if (btrfs_is_free_space_inode(inode)) {
    3689             :                 committed = 1;
    3690             :                 ASSERT(current->journal_info);
    3691             :         }
    3692             : 
    3693      159347 :         data_sinfo = fs_info->data_sinfo;
    3694      159347 :         if (!data_sinfo)
    3695             :                 goto alloc;
    3696             : 
    3697             : again:
    3698             :         /* make sure we have enough space to handle the data first */
    3699             :         spin_lock(&data_sinfo->lock);
    3700      478191 :         used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
    3701      318794 :                 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
    3702      159397 :                 data_sinfo->bytes_may_use;
    3703             : 
    3704      159397 :         if (used + bytes > data_sinfo->total_bytes) {
    3705             :                 struct btrfs_trans_handle *trans;
    3706             : 
    3707             :                 /*
    3708             :                  * if we don't have enough free bytes in this space then we need
    3709             :                  * to alloc a new chunk.
    3710             :                  */
    3711          39 :                 if (!data_sinfo->full && alloc_chunk) {
    3712             :                         u64 alloc_target;
    3713             : 
    3714          39 :                         data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
    3715             :                         spin_unlock(&data_sinfo->lock);
    3716             : alloc:
    3717          37 :                         alloc_target = btrfs_get_alloc_profile(root, 1);
    3718             :                         /*
    3719             :                          * It is ugly that we don't call nolock join
    3720             :                          * transaction for the free space inode case here.
    3721             :                          * But it is safe because we only do the data space
    3722             :                          * reservation for the free space cache in the
    3723             :                          * transaction context, the common join transaction
    3724             :                          * just increase the counter of the current transaction
    3725             :                          * handler, doesn't try to acquire the trans_lock of
    3726             :                          * the fs.
    3727             :                          */
    3728          39 :                         trans = btrfs_join_transaction(root);
    3729          39 :                         if (IS_ERR(trans))
    3730           0 :                                 return PTR_ERR(trans);
    3731             : 
    3732          39 :                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
    3733             :                                              alloc_target,
    3734             :                                              CHUNK_ALLOC_NO_FORCE);
    3735          39 :                         btrfs_end_transaction(trans, root);
    3736          39 :                         if (ret < 0) {
    3737           0 :                                 if (ret != -ENOSPC)
    3738             :                                         return ret;
    3739             :                                 else
    3740             :                                         goto commit_trans;
    3741             :                         }
    3742             : 
    3743          39 :                         if (!data_sinfo)
    3744           0 :                                 data_sinfo = fs_info->data_sinfo;
    3745             : 
    3746             :                         goto again;
    3747             :                 }
    3748             : 
    3749             :                 /*
    3750             :                  * If we don't have enough pinned space to deal with this
    3751             :                  * allocation don't bother committing the transaction.
    3752             :                  */
    3753           0 :                 if (percpu_counter_compare(&data_sinfo->total_bytes_pinned,
    3754             :                                            bytes) < 0)
    3755             :                         committed = 1;
    3756             :                 spin_unlock(&data_sinfo->lock);
    3757             : 
    3758             :                 /* commit the current transaction and try again */
    3759             : commit_trans:
    3760           0 :                 if (!committed &&
    3761           0 :                     !atomic_read(&root->fs_info->open_ioctl_trans)) {
    3762             :                         committed = 1;
    3763             : 
    3764           0 :                         trans = btrfs_join_transaction(root);
    3765           0 :                         if (IS_ERR(trans))
    3766           0 :                                 return PTR_ERR(trans);
    3767           0 :                         ret = btrfs_commit_transaction(trans, root);
    3768           0 :                         if (ret)
    3769             :                                 return ret;
    3770             :                         goto again;
    3771             :                 }
    3772             : 
    3773           0 :                 trace_btrfs_space_reservation(root->fs_info,
    3774             :                                               "space_info:enospc",
    3775             :                                               data_sinfo->flags, bytes, 1);
    3776           0 :                 return -ENOSPC;
    3777             :         }
    3778      159358 :         data_sinfo->bytes_may_use += bytes;
    3779      159358 :         trace_btrfs_space_reservation(root->fs_info, "space_info",
    3780             :                                       data_sinfo->flags, bytes, 1);
    3781             :         spin_unlock(&data_sinfo->lock);
    3782             : 
    3783      159358 :         return 0;
    3784             : }
    3785             : 
    3786             : /*
    3787             :  * Called if we need to clear a data reservation for this inode.
    3788             :  */
    3789       90628 : void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
    3790             : {
    3791       90628 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    3792             :         struct btrfs_space_info *data_sinfo;
    3793             : 
    3794             :         /* make sure bytes are sectorsize aligned */
    3795       90628 :         bytes = ALIGN(bytes, root->sectorsize);
    3796             : 
    3797       90628 :         data_sinfo = root->fs_info->data_sinfo;
    3798             :         spin_lock(&data_sinfo->lock);
    3799       90642 :         WARN_ON(data_sinfo->bytes_may_use < bytes);
    3800       90642 :         data_sinfo->bytes_may_use -= bytes;
    3801       90642 :         trace_btrfs_space_reservation(root->fs_info, "space_info",
    3802             :                                       data_sinfo->flags, bytes, 0);
    3803             :         spin_unlock(&data_sinfo->lock);
    3804       90642 : }
    3805             : 
    3806             : static void force_metadata_allocation(struct btrfs_fs_info *info)
    3807             : {
    3808             :         struct list_head *head = &info->space_info;
    3809             :         struct btrfs_space_info *found;
    3810             : 
    3811             :         rcu_read_lock();
    3812           0 :         list_for_each_entry_rcu(found, head, list) {
    3813           0 :                 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
    3814           0 :                         found->force_alloc = CHUNK_ALLOC_FORCE;
    3815             :         }
    3816             :         rcu_read_unlock();
    3817             : }
    3818             : 
    3819             : static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
    3820             : {
    3821        5307 :         return (global->size << 1);
    3822             : }
    3823             : 
    3824          87 : static int should_alloc_chunk(struct btrfs_root *root,
    3825             :                               struct btrfs_space_info *sinfo, int force)
    3826             : {
    3827          90 :         struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
    3828          87 :         u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
    3829          87 :         u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
    3830             :         u64 thresh;
    3831             : 
    3832          87 :         if (force == CHUNK_ALLOC_FORCE)
    3833             :                 return 1;
    3834             : 
    3835             :         /*
    3836             :          * We need to take into account the global rsv because for all intents
    3837             :          * and purposes it's used space.  Don't worry about locking the
    3838             :          * global_rsv, it doesn't change except when the transaction commits.
    3839             :          */
    3840           3 :         if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
    3841           3 :                 num_allocated += calc_global_rsv_need_space(global_rsv);
    3842             : 
    3843             :         /*
    3844             :          * in limited mode, we want to have some free space up to
    3845             :          * about 1% of the FS size.
    3846             :          */
    3847           3 :         if (force == CHUNK_ALLOC_LIMITED) {
    3848           0 :                 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
    3849           0 :                 thresh = max_t(u64, 64 * 1024 * 1024,
    3850             :                                div_factor_fine(thresh, 1));
    3851             : 
    3852           0 :                 if (num_bytes - num_allocated < thresh)
    3853             :                         return 1;
    3854             :         }
    3855             : 
    3856           6 :         if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8))
    3857             :                 return 0;
    3858             :         return 1;
    3859             : }
    3860             : 
    3861          87 : static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
    3862             : {
    3863             :         u64 num_dev;
    3864             : 
    3865          87 :         if (type & (BTRFS_BLOCK_GROUP_RAID10 |
    3866             :                     BTRFS_BLOCK_GROUP_RAID0 |
    3867             :                     BTRFS_BLOCK_GROUP_RAID5 |
    3868             :                     BTRFS_BLOCK_GROUP_RAID6))
    3869           0 :                 num_dev = root->fs_info->fs_devices->rw_devices;
    3870          87 :         else if (type & BTRFS_BLOCK_GROUP_RAID1)
    3871             :                 num_dev = 2;
    3872             :         else
    3873             :                 num_dev = 1;    /* DUP or single */
    3874             : 
    3875             :         /* metadata for updaing devices and chunk tree */
    3876          87 :         return btrfs_calc_trans_metadata_size(root, num_dev + 1);
    3877             : }
    3878             : 
    3879          87 : static void check_system_chunk(struct btrfs_trans_handle *trans,
    3880             :                                struct btrfs_root *root, u64 type)
    3881             : {
    3882             :         struct btrfs_space_info *info;
    3883             :         u64 left;
    3884             :         u64 thresh;
    3885             : 
    3886          87 :         info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
    3887             :         spin_lock(&info->lock);
    3888         261 :         left = info->total_bytes - info->bytes_used - info->bytes_pinned -
    3889         174 :                 info->bytes_reserved - info->bytes_readonly;
    3890             :         spin_unlock(&info->lock);
    3891             : 
    3892             :         thresh = get_system_chunk_thresh(root, type);
    3893          87 :         if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
    3894           0 :                 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
    3895             :                         left, thresh, type);
    3896           0 :                 dump_space_info(info, 0, 0);
    3897             :         }
    3898             : 
    3899          87 :         if (left < thresh) {
    3900             :                 u64 flags;
    3901             : 
    3902           0 :                 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
    3903           0 :                 btrfs_alloc_chunk(trans, root, flags);
    3904             :         }
    3905          87 : }
    3906             : 
    3907          87 : static int do_chunk_alloc(struct btrfs_trans_handle *trans,
    3908          87 :                           struct btrfs_root *extent_root, u64 flags, int force)
    3909             : {
    3910             :         struct btrfs_space_info *space_info;
    3911          87 :         struct btrfs_fs_info *fs_info = extent_root->fs_info;
    3912             :         int wait_for_alloc = 0;
    3913             :         int ret = 0;
    3914             : 
    3915             :         /* Don't re-enter if we're already allocating a chunk */
    3916          87 :         if (trans->allocating_chunk)
    3917             :                 return -ENOSPC;
    3918             : 
    3919          87 :         space_info = __find_space_info(extent_root->fs_info, flags);
    3920          87 :         if (!space_info) {
    3921           0 :                 ret = update_space_info(extent_root->fs_info, flags,
    3922             :                                         0, 0, &space_info);
    3923           0 :                 BUG_ON(ret); /* -ENOMEM */
    3924             :         }
    3925          87 :         BUG_ON(!space_info); /* Logic error */
    3926             : 
    3927             : again:
    3928          87 :         spin_lock(&space_info->lock);
    3929          87 :         if (force < space_info->force_alloc)
    3930          39 :                 force = space_info->force_alloc;
    3931          87 :         if (space_info->full) {
    3932           0 :                 if (should_alloc_chunk(extent_root, space_info, force))
    3933             :                         ret = -ENOSPC;
    3934             :                 else
    3935             :                         ret = 0;
    3936             :                 spin_unlock(&space_info->lock);
    3937           0 :                 return ret;
    3938             :         }
    3939             : 
    3940          87 :         if (!should_alloc_chunk(extent_root, space_info, force)) {
    3941             :                 spin_unlock(&space_info->lock);
    3942           0 :                 return 0;
    3943          87 :         } else if (space_info->chunk_alloc) {
    3944             :                 wait_for_alloc = 1;
    3945             :         } else {
    3946          87 :                 space_info->chunk_alloc = 1;
    3947             :         }
    3948             : 
    3949             :         spin_unlock(&space_info->lock);
    3950             : 
    3951          87 :         mutex_lock(&fs_info->chunk_mutex);
    3952             : 
    3953             :         /*
    3954             :          * The chunk_mutex is held throughout the entirety of a chunk
    3955             :          * allocation, so once we've acquired the chunk_mutex we know that the
    3956             :          * other guy is done and we need to recheck and see if we should
    3957             :          * allocate.
    3958             :          */
    3959          87 :         if (wait_for_alloc) {
    3960           0 :                 mutex_unlock(&fs_info->chunk_mutex);
    3961             :                 wait_for_alloc = 0;
    3962           0 :                 goto again;
    3963             :         }
    3964             : 
    3965          87 :         trans->allocating_chunk = true;
    3966             : 
    3967             :         /*
    3968             :          * If we have mixed data/metadata chunks we want to make sure we keep
    3969             :          * allocating mixed chunks instead of individual chunks.
    3970             :          */
    3971         174 :         if (btrfs_mixed_space_info(space_info))
    3972           4 :                 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
    3973             : 
    3974             :         /*
    3975             :          * if we're doing a data chunk, go ahead and make sure that
    3976             :          * we keep a reasonable number of metadata chunks allocated in the
    3977             :          * FS as well.
    3978             :          */
    3979          87 :         if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
    3980           0 :                 fs_info->data_chunk_allocations++;
    3981           0 :                 if (!(fs_info->data_chunk_allocations %
    3982             :                       fs_info->metadata_ratio))
    3983             :                         force_metadata_allocation(fs_info);
    3984             :         }
    3985             : 
    3986             :         /*
    3987             :          * Check if we have enough space in SYSTEM chunk because we may need
    3988             :          * to update devices.
    3989             :          */
    3990          87 :         check_system_chunk(trans, extent_root, flags);
    3991             : 
    3992          87 :         ret = btrfs_alloc_chunk(trans, extent_root, flags);
    3993          87 :         trans->allocating_chunk = false;
    3994             : 
    3995          87 :         spin_lock(&space_info->lock);
    3996          87 :         if (ret < 0 && ret != -ENOSPC)
    3997             :                 goto out;
    3998          87 :         if (ret)
    3999           0 :                 space_info->full = 1;
    4000             :         else
    4001             :                 ret = 1;
    4002             : 
    4003          87 :         space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
    4004             : out:
    4005          87 :         space_info->chunk_alloc = 0;
    4006             :         spin_unlock(&space_info->lock);
    4007          87 :         mutex_unlock(&fs_info->chunk_mutex);
    4008          87 :         return ret;
    4009             : }
    4010             : 
    4011        5304 : static int can_overcommit(struct btrfs_root *root,
    4012             :                           struct btrfs_space_info *space_info, u64 bytes,
    4013             :                           enum btrfs_reserve_flush_enum flush)
    4014             : {
    4015       10608 :         struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
    4016        5304 :         u64 profile = btrfs_get_alloc_profile(root, 0);
    4017             :         u64 space_size;
    4018             :         u64 avail;
    4019             :         u64 used;
    4020             : 
    4021       15912 :         used = space_info->bytes_used + space_info->bytes_reserved +
    4022       10608 :                 space_info->bytes_pinned + space_info->bytes_readonly;
    4023             : 
    4024             :         /*
    4025             :          * We only want to allow over committing if we have lots of actual space
    4026             :          * free, but if we don't have enough space to handle the global reserve
    4027             :          * space then we could end up having a real enospc problem when trying
    4028             :          * to allocate a chunk or some other such important allocation.
    4029             :          */
    4030             :         spin_lock(&global_rsv->lock);
    4031             :         space_size = calc_global_rsv_need_space(global_rsv);
    4032             :         spin_unlock(&global_rsv->lock);
    4033        5304 :         if (used + space_size >= space_info->total_bytes)
    4034             :                 return 0;
    4035             : 
    4036        4916 :         used += space_info->bytes_may_use;
    4037             : 
    4038        4916 :         spin_lock(&root->fs_info->free_chunk_lock);
    4039        4916 :         avail = root->fs_info->free_chunk_space;
    4040             :         spin_unlock(&root->fs_info->free_chunk_lock);
    4041             : 
    4042             :         /*
    4043             :          * If we have dup, raid1 or raid10 then only half of the free
    4044             :          * space is actually useable.  For raid56, the space info used
    4045             :          * doesn't include the parity drive, so we don't have to
    4046             :          * change the math
    4047             :          */
    4048        4916 :         if (profile & (BTRFS_BLOCK_GROUP_DUP |
    4049             :                        BTRFS_BLOCK_GROUP_RAID1 |
    4050             :                        BTRFS_BLOCK_GROUP_RAID10))
    4051        1543 :                 avail >>= 1;
    4052             : 
    4053             :         /*
    4054             :          * If we aren't flushing all things, let us overcommit up to
    4055             :          * 1/2th of the space. If we can flush, don't let us overcommit
    4056             :          * too much, let it overcommit up to 1/8 of the space.
    4057             :          */
    4058        4916 :         if (flush == BTRFS_RESERVE_FLUSH_ALL)
    4059        4916 :                 avail >>= 3;
    4060             :         else
    4061           0 :                 avail >>= 1;
    4062             : 
    4063        4916 :         if (used + bytes < space_info->total_bytes + avail)
    4064             :                 return 1;
    4065           0 :         return 0;
    4066             : }
    4067             : 
    4068          50 : static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
    4069             :                                          unsigned long nr_pages, int nr_items)
    4070             : {
    4071          50 :         struct super_block *sb = root->fs_info->sb;
    4072             : 
    4073          50 :         if (down_read_trylock(&sb->s_umount)) {
    4074          50 :                 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
    4075          50 :                 up_read(&sb->s_umount);
    4076             :         } else {
    4077             :                 /*
    4078             :                  * We needn't worry the filesystem going from r/w to r/o though
    4079             :                  * we don't acquire ->s_umount mutex, because the filesystem
    4080             :                  * should guarantee the delalloc inodes list be empty after
    4081             :                  * the filesystem is readonly(all dirty pages are written to
    4082             :                  * the disk).
    4083             :                  */
    4084           0 :                 btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
    4085           0 :                 if (!current->journal_info)
    4086           0 :                         btrfs_wait_ordered_roots(root->fs_info, nr_items);
    4087             :         }
    4088          50 : }
    4089             : 
    4090         232 : static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
    4091             : {
    4092             :         u64 bytes;
    4093             :         int nr;
    4094             : 
    4095             :         bytes = btrfs_calc_trans_metadata_size(root, 1);
    4096         232 :         nr = (int)div64_u64(to_reclaim, bytes);
    4097         232 :         if (!nr)
    4098             :                 nr = 1;
    4099             :         return nr;
    4100             : }
    4101             : 
    4102             : #define EXTENT_SIZE_PER_ITEM    (256 * 1024)
    4103             : 
    4104             : /*
    4105             :  * shrink metadata reservation for delalloc
    4106             :  */
    4107          54 : static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
    4108             :                             bool wait_ordered)
    4109             : {
    4110             :         struct btrfs_block_rsv *block_rsv;
    4111             :         struct btrfs_space_info *space_info;
    4112             :         struct btrfs_trans_handle *trans;
    4113             :         u64 delalloc_bytes;
    4114             :         u64 max_reclaim;
    4115             :         long time_left;
    4116             :         unsigned long nr_pages;
    4117             :         int loops;
    4118             :         int items;
    4119             :         enum btrfs_reserve_flush_enum flush;
    4120             : 
    4121             :         /* Calc the number of the pages we need flush for space reservation */
    4122             :         items = calc_reclaim_items_nr(root, to_reclaim);
    4123          54 :         to_reclaim = items * EXTENT_SIZE_PER_ITEM;
    4124             : 
    4125          54 :         trans = (struct btrfs_trans_handle *)current->journal_info;
    4126          54 :         block_rsv = &root->fs_info->delalloc_block_rsv;
    4127          54 :         space_info = block_rsv->space_info;
    4128             : 
    4129         108 :         delalloc_bytes = percpu_counter_sum_positive(
    4130             :                                                 &root->fs_info->delalloc_bytes);
    4131          54 :         if (delalloc_bytes == 0) {
    4132           8 :                 if (trans)
    4133             :                         return;
    4134           8 :                 if (wait_ordered)
    4135           5 :                         btrfs_wait_ordered_roots(root->fs_info, items);
    4136             :                 return;
    4137             :         }
    4138             : 
    4139             :         loops = 0;
    4140          96 :         while (delalloc_bytes && loops < 3) {
    4141          50 :                 max_reclaim = min(delalloc_bytes, to_reclaim);
    4142          50 :                 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
    4143          50 :                 btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
    4144             :                 /*
    4145             :                  * We need to wait for the async pages to actually start before
    4146             :                  * we do anything.
    4147             :                  */
    4148         100 :                 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
    4149          50 :                 if (!max_reclaim)
    4150             :                         goto skip_async;
    4151             : 
    4152           0 :                 if (max_reclaim <= nr_pages)
    4153             :                         max_reclaim = 0;
    4154             :                 else
    4155           0 :                         max_reclaim -= nr_pages;
    4156             : 
    4157           0 :                 wait_event(root->fs_info->async_submit_wait,
    4158             :                            atomic_read(&root->fs_info->async_delalloc_pages) <=
    4159             :                            (int)max_reclaim);
    4160             : skip_async:
    4161          50 :                 if (!trans)
    4162             :                         flush = BTRFS_RESERVE_FLUSH_ALL;
    4163             :                 else
    4164             :                         flush = BTRFS_RESERVE_NO_FLUSH;
    4165             :                 spin_lock(&space_info->lock);
    4166          50 :                 if (can_overcommit(root, space_info, orig, flush)) {
    4167             :                         spin_unlock(&space_info->lock);
    4168             :                         break;
    4169             :                 }
    4170             :                 spin_unlock(&space_info->lock);
    4171             : 
    4172          50 :                 loops++;
    4173          50 :                 if (wait_ordered && !trans) {
    4174           0 :                         btrfs_wait_ordered_roots(root->fs_info, items);
    4175             :                 } else {
    4176          50 :                         time_left = schedule_timeout_killable(1);
    4177          50 :                         if (time_left)
    4178             :                                 break;
    4179             :                 }
    4180         100 :                 delalloc_bytes = percpu_counter_sum_positive(
    4181          50 :                                                 &root->fs_info->delalloc_bytes);
    4182             :         }
    4183             : }
    4184             : 
    4185             : /**
    4186             :  * maybe_commit_transaction - possibly commit the transaction if its ok to
    4187             :  * @root - the root we're allocating for
    4188             :  * @bytes - the number of bytes we want to reserve
    4189             :  * @force - force the commit
    4190             :  *
    4191             :  * This will check to make sure that committing the transaction will actually
    4192             :  * get us somewhere and then commit the transaction if it does.  Otherwise it
    4193             :  * will return -ENOSPC.
    4194             :  */
    4195           0 : static int may_commit_transaction(struct btrfs_root *root,
    4196             :                                   struct btrfs_space_info *space_info,
    4197             :                                   u64 bytes, int force)
    4198             : {
    4199           0 :         struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
    4200             :         struct btrfs_trans_handle *trans;
    4201             : 
    4202           0 :         trans = (struct btrfs_trans_handle *)current->journal_info;
    4203           0 :         if (trans)
    4204             :                 return -EAGAIN;
    4205             : 
    4206           0 :         if (force)
    4207             :                 goto commit;
    4208             : 
    4209             :         /* See if there is enough pinned space to make this reservation */
    4210           0 :         if (percpu_counter_compare(&space_info->total_bytes_pinned,
    4211             :                                    bytes) >= 0)
    4212             :                 goto commit;
    4213             : 
    4214             :         /*
    4215             :          * See if there is some space in the delayed insertion reservation for
    4216             :          * this reservation.
    4217             :          */
    4218           0 :         if (space_info != delayed_rsv->space_info)
    4219             :                 return -ENOSPC;
    4220             : 
    4221             :         spin_lock(&delayed_rsv->lock);
    4222           0 :         if (percpu_counter_compare(&space_info->total_bytes_pinned,
    4223           0 :                                    bytes - delayed_rsv->size) >= 0) {
    4224             :                 spin_unlock(&delayed_rsv->lock);
    4225           0 :                 return -ENOSPC;
    4226             :         }
    4227             :         spin_unlock(&delayed_rsv->lock);
    4228             : 
    4229             : commit:
    4230           0 :         trans = btrfs_join_transaction(root);
    4231           0 :         if (IS_ERR(trans))
    4232             :                 return -ENOSPC;
    4233             : 
    4234           0 :         return btrfs_commit_transaction(trans, root);
    4235             : }
    4236             : 
    4237             : enum flush_state {
    4238             :         FLUSH_DELAYED_ITEMS_NR  =       1,
    4239             :         FLUSH_DELAYED_ITEMS     =       2,
    4240             :         FLUSH_DELALLOC          =       3,
    4241             :         FLUSH_DELALLOC_WAIT     =       4,
    4242             :         ALLOC_CHUNK             =       5,
    4243             :         COMMIT_TRANS            =       6,
    4244             : };
    4245             : 
    4246         284 : static int flush_space(struct btrfs_root *root,
    4247             :                        struct btrfs_space_info *space_info, u64 num_bytes,
    4248             :                        u64 orig_bytes, int state)
    4249             : {
    4250             :         struct btrfs_trans_handle *trans;
    4251             :         int nr;
    4252             :         int ret = 0;
    4253             : 
    4254         284 :         switch (state) {
    4255             :         case FLUSH_DELAYED_ITEMS_NR:
    4256             :         case FLUSH_DELAYED_ITEMS:
    4257         227 :                 if (state == FLUSH_DELAYED_ITEMS_NR)
    4258         178 :                         nr = calc_reclaim_items_nr(root, num_bytes) * 2;
    4259             :                 else
    4260             :                         nr = -1;
    4261             : 
    4262         227 :                 trans = btrfs_join_transaction(root);
    4263         227 :                 if (IS_ERR(trans)) {
    4264           0 :                         ret = PTR_ERR(trans);
    4265           0 :                         break;
    4266             :                 }
    4267         227 :                 ret = btrfs_run_delayed_items_nr(trans, root, nr);
    4268         227 :                 btrfs_end_transaction(trans, root);
    4269         227 :                 break;
    4270             :         case FLUSH_DELALLOC:
    4271             :         case FLUSH_DELALLOC_WAIT:
    4272          54 :                 shrink_delalloc(root, num_bytes * 2, orig_bytes,
    4273             :                                 state == FLUSH_DELALLOC_WAIT);
    4274          54 :                 break;
    4275             :         case ALLOC_CHUNK:
    4276           3 :                 trans = btrfs_join_transaction(root);
    4277           3 :                 if (IS_ERR(trans)) {
    4278           0 :                         ret = PTR_ERR(trans);
    4279           0 :                         break;
    4280             :                 }
    4281           3 :                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
    4282             :                                      btrfs_get_alloc_profile(root, 0),
    4283             :                                      CHUNK_ALLOC_NO_FORCE);
    4284           3 :                 btrfs_end_transaction(trans, root);
    4285           3 :                 if (ret == -ENOSPC)
    4286             :                         ret = 0;
    4287             :                 break;
    4288             :         case COMMIT_TRANS:
    4289           0 :                 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
    4290           0 :                 break;
    4291             :         default:
    4292             :                 ret = -ENOSPC;
    4293             :                 break;
    4294             :         }
    4295             : 
    4296         284 :         return ret;
    4297             : }
    4298             : 
    4299             : static inline u64
    4300        2332 : btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
    4301             :                                  struct btrfs_space_info *space_info)
    4302             : {
    4303             :         u64 used;
    4304             :         u64 expected;
    4305             :         u64 to_reclaim;
    4306             : 
    4307        4664 :         to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
    4308             :                                 16 * 1024 * 1024);
    4309             :         spin_lock(&space_info->lock);
    4310        2332 :         if (can_overcommit(root, space_info, to_reclaim,
    4311             :                            BTRFS_RESERVE_FLUSH_ALL)) {
    4312             :                 to_reclaim = 0;
    4313             :                 goto out;
    4314             :         }
    4315             : 
    4316         306 :         used = space_info->bytes_used + space_info->bytes_reserved +
    4317         204 :                space_info->bytes_pinned + space_info->bytes_readonly +
    4318         102 :                space_info->bytes_may_use;
    4319         102 :         if (can_overcommit(root, space_info, 1024 * 1024,
    4320             :                            BTRFS_RESERVE_FLUSH_ALL))
    4321           0 :                 expected = div_factor_fine(space_info->total_bytes, 95);
    4322             :         else
    4323         102 :                 expected = div_factor_fine(space_info->total_bytes, 90);
    4324             : 
    4325         102 :         if (used > expected)
    4326         102 :                 to_reclaim = used - expected;
    4327             :         else
    4328             :                 to_reclaim = 0;
    4329         102 :         to_reclaim = min(to_reclaim, space_info->bytes_may_use +
    4330             :                                      space_info->bytes_reserved);
    4331             : out:
    4332             :         spin_unlock(&space_info->lock);
    4333             : 
    4334        2332 :         return to_reclaim;
    4335             : }
    4336             : 
    4337             : static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
    4338             :                                         struct btrfs_fs_info *fs_info, u64 used)
    4339             : {
    4340        3152 :         return (used >= div_factor_fine(space_info->total_bytes, 98) &&
    4341      225853 :                 !btrfs_fs_closing(fs_info) &&
    4342             :                 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
    4343             : }
    4344             : 
    4345         300 : static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
    4346             :                                        struct btrfs_fs_info *fs_info)
    4347             : {
    4348             :         u64 used;
    4349             : 
    4350             :         spin_lock(&space_info->lock);
    4351         450 :         used = space_info->bytes_used + space_info->bytes_reserved +
    4352         300 :                space_info->bytes_pinned + space_info->bytes_readonly +
    4353         150 :                space_info->bytes_may_use;
    4354         150 :         if (need_do_async_reclaim(space_info, fs_info, used)) {
    4355             :                 spin_unlock(&space_info->lock);
    4356          48 :                 return 1;
    4357             :         }
    4358             :         spin_unlock(&space_info->lock);
    4359             : 
    4360         102 :         return 0;
    4361             : }
    4362             : 
    4363        2332 : static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
    4364             : {
    4365             :         struct btrfs_fs_info *fs_info;
    4366             :         struct btrfs_space_info *space_info;
    4367             :         u64 to_reclaim;
    4368             :         int flush_state;
    4369             : 
    4370        2332 :         fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
    4371             :         space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
    4372             : 
    4373        2332 :         to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
    4374             :                                                       space_info);
    4375        2332 :         if (!to_reclaim)
    4376             :                 return;
    4377             : 
    4378             :         flush_state = FLUSH_DELAYED_ITEMS_NR;
    4379             :         do {
    4380         150 :                 flush_space(fs_info->fs_root, space_info, to_reclaim,
    4381             :                             to_reclaim, flush_state);
    4382         150 :                 flush_state++;
    4383         150 :                 if (!btrfs_need_do_async_reclaim(space_info, fs_info))
    4384             :                         return;
    4385          48 :         } while (flush_state <= COMMIT_TRANS);
    4386             : 
    4387           0 :         if (btrfs_need_do_async_reclaim(space_info, fs_info))
    4388           0 :                 queue_work(system_unbound_wq, work);
    4389             : }
    4390             : 
    4391         221 : void btrfs_init_async_reclaim_work(struct work_struct *work)
    4392             : {
    4393         442 :         INIT_WORK(work, btrfs_async_reclaim_metadata_space);
    4394         221 : }
    4395             : 
    4396             : /**
    4397             :  * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
    4398             :  * @root - the root we're allocating for
    4399             :  * @block_rsv - the block_rsv we're allocating for
    4400             :  * @orig_bytes - the number of bytes we want
    4401             :  * @flush - whether or not we can flush to make our reservation
    4402             :  *
    4403             :  * This will reserve orgi_bytes number of bytes from the space info associated
    4404             :  * with the block_rsv.  If there is not enough space it will make an attempt to
    4405             :  * flush out space to make room.  It will do this by flushing delalloc if
    4406             :  * possible or committing the transaction.  If flush is 0 then no attempts to
    4407             :  * regain reservations will be made and this will fail if there is not enough
    4408             :  * space already.
    4409             :  */
    4410      222682 : static int reserve_metadata_bytes(struct btrfs_root *root,
    4411             :                                   struct btrfs_block_rsv *block_rsv,
    4412             :                                   u64 orig_bytes,
    4413             :                                   enum btrfs_reserve_flush_enum flush)
    4414             : {
    4415      445233 :         struct btrfs_space_info *space_info = block_rsv->space_info;
    4416             :         u64 used;
    4417             :         u64 num_bytes = orig_bytes;
    4418             :         int flush_state = FLUSH_DELAYED_ITEMS_NR;
    4419             :         int ret = 0;
    4420             :         bool flushing = false;
    4421             : 
    4422             : again:
    4423             :         ret = 0;
    4424             :         spin_lock(&space_info->lock);
    4425             :         /*
    4426             :          * We only want to wait if somebody other than us is flushing and we
    4427             :          * are actually allowed to flush all things.
    4428             :          */
    4429      222823 :         while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
    4430             :                space_info->flush) {
    4431             :                 spin_unlock(&space_info->lock);
    4432             :                 /*
    4433             :                  * If we have a trans handle we can't wait because the flusher
    4434             :                  * may have to commit the transaction, which would mean we would
    4435             :                  * deadlock since we are waiting for the flusher to finish, but
    4436             :                  * hold the current transaction open.
    4437             :                  */
    4438           2 :                 if (current->journal_info)
    4439             :                         return -EAGAIN;
    4440           4 :                 ret = wait_event_killable(space_info->wait, !space_info->flush);
    4441             :                 /* Must have been killed, return */
    4442           2 :                 if (ret)
    4443             :                         return -EINTR;
    4444             : 
    4445             :                 spin_lock(&space_info->lock);
    4446             :         }
    4447             : 
    4448             :         ret = -ENOSPC;
    4449      668463 :         used = space_info->bytes_used + space_info->bytes_reserved +
    4450      445642 :                 space_info->bytes_pinned + space_info->bytes_readonly +
    4451      222821 :                 space_info->bytes_may_use;
    4452             : 
    4453             :         /*
    4454             :          * The idea here is that we've not already over-reserved the block group
    4455             :          * then we can go ahead and save our reservation first and then start
    4456             :          * flushing if we need to.  Otherwise if we've already overcommitted
    4457             :          * lets start flushing stuff first and then come back and try to make
    4458             :          * our reservation.
    4459             :          */
    4460      222821 :         if (used <= space_info->total_bytes) {
    4461      220143 :                 if (used + orig_bytes <= space_info->total_bytes) {
    4462      220001 :                         space_info->bytes_may_use += orig_bytes;
    4463      220001 :                         trace_btrfs_space_reservation(root->fs_info,
    4464             :                                 "space_info", space_info->flags, orig_bytes, 1);
    4465             :                         ret = 0;
    4466             :                 } else {
    4467             :                         /*
    4468             :                          * Ok set num_bytes to orig_bytes since we aren't
    4469             :                          * overocmmitted, this way we only try and reclaim what
    4470             :                          * we need.
    4471             :                          */
    4472             :                         num_bytes = orig_bytes;
    4473             :                 }
    4474             :         } else {
    4475             :                 /*
    4476             :                  * Ok we're over committed, set num_bytes to the overcommitted
    4477             :                  * amount plus the amount of bytes that we need for this
    4478             :                  * reservation.
    4479             :                  */
    4480        5356 :                 num_bytes = used - space_info->total_bytes +
    4481        2678 :                         (orig_bytes * 2);
    4482             :         }
    4483             : 
    4484      222820 :         if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
    4485        2686 :                 space_info->bytes_may_use += orig_bytes;
    4486        2686 :                 trace_btrfs_space_reservation(root->fs_info, "space_info",
    4487             :                                               space_info->flags, orig_bytes,
    4488             :                                               1);
    4489             :                 ret = 0;
    4490             :         }
    4491             : 
    4492             :         /*
    4493             :          * Couldn't make our reservation, save our place so while we're trying
    4494             :          * to reclaim space we can actually use it instead of somebody else
    4495             :          * stealing it from us.
    4496             :          *
    4497             :          * We make the other tasks wait for the flush only when we can flush
    4498             :          * all things.
    4499             :          */
    4500      222820 :         if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
    4501             :                 flushing = true;
    4502         134 :                 space_info->flush = 1;
    4503      222686 :         } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
    4504      222551 :                 used += orig_bytes;
    4505      448206 :                 if (need_do_async_reclaim(space_info, root->fs_info, used) &&
    4506        3104 :                     !work_busy(&root->fs_info->async_reclaim_work))
    4507        2332 :                         queue_work(system_unbound_wq,
    4508        2332 :                                    &root->fs_info->async_reclaim_work);
    4509             :         }
    4510             :         spin_unlock(&space_info->lock);
    4511             : 
    4512      222820 :         if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
    4513             :                 goto out;
    4514             : 
    4515         134 :         ret = flush_space(root, space_info, num_bytes, orig_bytes,
    4516             :                           flush_state);
    4517         134 :         flush_state++;
    4518             : 
    4519             :         /*
    4520             :          * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
    4521             :          * would happen. So skip delalloc flush.
    4522             :          */
    4523         134 :         if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
    4524           0 :             (flush_state == FLUSH_DELALLOC ||
    4525             :              flush_state == FLUSH_DELALLOC_WAIT))
    4526             :                 flush_state = ALLOC_CHUNK;
    4527             : 
    4528         134 :         if (!ret)
    4529             :                 goto again;
    4530           2 :         else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
    4531           1 :                  flush_state < COMMIT_TRANS)
    4532             :                 goto again;
    4533           1 :         else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
    4534           1 :                  flush_state <= COMMIT_TRANS)
    4535             :                 goto again;
    4536             : 
    4537             : out:
    4538      222686 :         if (ret == -ENOSPC &&
    4539           0 :             unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
    4540           0 :                 struct btrfs_block_rsv *global_rsv =
    4541           0 :                         &root->fs_info->global_block_rsv;
    4542             : 
    4543           0 :                 if (block_rsv != global_rsv &&
    4544           0 :                     !block_rsv_use_bytes(global_rsv, orig_bytes))
    4545             :                         ret = 0;
    4546             :         }
    4547      222686 :         if (ret == -ENOSPC)
    4548           0 :                 trace_btrfs_space_reservation(root->fs_info,
    4549             :                                               "space_info:enospc",
    4550             :                                               space_info->flags, orig_bytes, 1);
    4551      222683 :         if (flushing) {
    4552             :                 spin_lock(&space_info->lock);
    4553          76 :                 space_info->flush = 0;
    4554          76 :                 wake_up_all(&space_info->wait);
    4555             :                 spin_unlock(&space_info->lock);
    4556             :         }
    4557      222683 :         return ret;
    4558             : }
    4559             : 
    4560       68514 : static struct btrfs_block_rsv *get_block_rsv(
    4561             :                                         const struct btrfs_trans_handle *trans,
    4562             :                                         const struct btrfs_root *root)
    4563             : {
    4564             :         struct btrfs_block_rsv *block_rsv = NULL;
    4565             : 
    4566       68514 :         if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
    4567       37567 :                 block_rsv = trans->block_rsv;
    4568             : 
    4569       68514 :         if (root == root->fs_info->csum_root && trans->adding_csums)
    4570        4320 :                 block_rsv = trans->block_rsv;
    4571             : 
    4572       68514 :         if (root == root->fs_info->uuid_root)
    4573         263 :                 block_rsv = trans->block_rsv;
    4574             : 
    4575       68514 :         if (!block_rsv)
    4576       26387 :                 block_rsv = root->block_rsv;
    4577             : 
    4578       68514 :         if (!block_rsv)
    4579        3354 :                 block_rsv = &root->fs_info->empty_block_rsv;
    4580             : 
    4581       68514 :         return block_rsv;
    4582             : }
    4583             : 
    4584      160165 : static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
    4585             :                                u64 num_bytes)
    4586             : {
    4587             :         int ret = -ENOSPC;
    4588             :         spin_lock(&block_rsv->lock);
    4589      160184 :         if (block_rsv->reserved >= num_bytes) {
    4590      160171 :                 block_rsv->reserved -= num_bytes;
    4591      160171 :                 if (block_rsv->reserved < block_rsv->size)
    4592      160171 :                         block_rsv->full = 0;
    4593             :                 ret = 0;
    4594             :         }
    4595             :         spin_unlock(&block_rsv->lock);
    4596      160184 :         return ret;
    4597             : }
    4598             : 
    4599      323967 : static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
    4600             :                                 u64 num_bytes, int update_size)
    4601             : {
    4602             :         spin_lock(&block_rsv->lock);
    4603      323970 :         block_rsv->reserved += num_bytes;
    4604      323970 :         if (update_size)
    4605      316106 :                 block_rsv->size += num_bytes;
    4606        7864 :         else if (block_rsv->reserved >= block_rsv->size)
    4607        7864 :                 block_rsv->full = 1;
    4608             :         spin_unlock(&block_rsv->lock);
    4609      323970 : }
    4610             : 
    4611           0 : int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
    4612             :                              struct btrfs_block_rsv *dest, u64 num_bytes,
    4613             :                              int min_factor)
    4614             : {
    4615             :         struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
    4616             :         u64 min_bytes;
    4617             : 
    4618           0 :         if (global_rsv->space_info != dest->space_info)
    4619             :                 return -ENOSPC;
    4620             : 
    4621             :         spin_lock(&global_rsv->lock);
    4622           0 :         min_bytes = div_factor(global_rsv->size, min_factor);
    4623           0 :         if (global_rsv->reserved < min_bytes + num_bytes) {
    4624             :                 spin_unlock(&global_rsv->lock);
    4625           0 :                 return -ENOSPC;
    4626             :         }
    4627           0 :         global_rsv->reserved -= num_bytes;
    4628           0 :         if (global_rsv->reserved < global_rsv->size)
    4629           0 :                 global_rsv->full = 0;
    4630             :         spin_unlock(&global_rsv->lock);
    4631             : 
    4632           0 :         block_rsv_add_bytes(dest, num_bytes, 1);
    4633           0 :         return 0;
    4634             : }
    4635             : 
    4636      293261 : static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
    4637             :                                     struct btrfs_block_rsv *block_rsv,
    4638             :                                     struct btrfs_block_rsv *dest, u64 num_bytes)
    4639             : {
    4640      293261 :         struct btrfs_space_info *space_info = block_rsv->space_info;
    4641             : 
    4642             :         spin_lock(&block_rsv->lock);
    4643      293302 :         if (num_bytes == (u64)-1)
    4644       11065 :                 num_bytes = block_rsv->size;
    4645      293302 :         block_rsv->size -= num_bytes;
    4646      293302 :         if (block_rsv->reserved >= block_rsv->size) {
    4647      289940 :                 num_bytes = block_rsv->reserved - block_rsv->size;
    4648      289940 :                 block_rsv->reserved = block_rsv->size;
    4649      289940 :                 block_rsv->full = 1;
    4650             :         } else {
    4651             :                 num_bytes = 0;
    4652             :         }
    4653             :         spin_unlock(&block_rsv->lock);
    4654             : 
    4655      293300 :         if (num_bytes > 0) {
    4656      232254 :                 if (dest) {
    4657             :                         spin_lock(&dest->lock);
    4658      232037 :                         if (!dest->full) {
    4659             :                                 u64 bytes_to_add;
    4660             : 
    4661        2776 :                                 bytes_to_add = dest->size - dest->reserved;
    4662        2776 :                                 bytes_to_add = min(num_bytes, bytes_to_add);
    4663        2776 :                                 dest->reserved += bytes_to_add;
    4664        2776 :                                 if (dest->reserved >= dest->size)
    4665        2366 :                                         dest->full = 1;
    4666        2776 :                                 num_bytes -= bytes_to_add;
    4667             :                         }
    4668             :                         spin_unlock(&dest->lock);
    4669             :                 }
    4670      232255 :                 if (num_bytes) {
    4671             :                         spin_lock(&space_info->lock);
    4672      231837 :                         space_info->bytes_may_use -= num_bytes;
    4673      231837 :                         trace_btrfs_space_reservation(fs_info, "space_info",
    4674             :                                         space_info->flags, num_bytes, 0);
    4675             :                         spin_unlock(&space_info->lock);
    4676             :                 }
    4677             :         }
    4678      293304 : }
    4679             : 
    4680      104781 : static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
    4681             :                                    struct btrfs_block_rsv *dst, u64 num_bytes)
    4682             : {
    4683             :         int ret;
    4684             : 
    4685      104781 :         ret = block_rsv_use_bytes(src, num_bytes);
    4686      104787 :         if (ret)
    4687             :                 return ret;
    4688             : 
    4689      104787 :         block_rsv_add_bytes(dst, num_bytes, 1);
    4690      104786 :         return 0;
    4691             : }
    4692             : 
    4693        1554 : void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
    4694             : {
    4695       11929 :         memset(rsv, 0, sizeof(*rsv));
    4696       11929 :         spin_lock_init(&rsv->lock);
    4697       11929 :         rsv->type = type;
    4698        1554 : }
    4699             : 
    4700       10375 : struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
    4701             :                                               unsigned short type)
    4702             : {
    4703             :         struct btrfs_block_rsv *block_rsv;
    4704       10375 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4705             : 
    4706             :         block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
    4707       10375 :         if (!block_rsv)
    4708             :                 return NULL;
    4709             : 
    4710             :         btrfs_init_block_rsv(block_rsv, type);
    4711       10375 :         block_rsv->space_info = __find_space_info(fs_info,
    4712             :                                                   BTRFS_BLOCK_GROUP_METADATA);
    4713       10375 :         return block_rsv;
    4714             : }
    4715             : 
    4716       11282 : void btrfs_free_block_rsv(struct btrfs_root *root,
    4717             :                           struct btrfs_block_rsv *rsv)
    4718             : {
    4719       11282 :         if (!rsv)
    4720       11282 :                 return;
    4721       10376 :         btrfs_block_rsv_release(root, rsv, (u64)-1);
    4722       10376 :         kfree(rsv);
    4723             : }
    4724             : 
    4725       56987 : int btrfs_block_rsv_add(struct btrfs_root *root,
    4726             :                         struct btrfs_block_rsv *block_rsv, u64 num_bytes,
    4727             :                         enum btrfs_reserve_flush_enum flush)
    4728             : {
    4729             :         int ret;
    4730             : 
    4731       56987 :         if (num_bytes == 0)
    4732             :                 return 0;
    4733             : 
    4734       56987 :         ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
    4735       56988 :         if (!ret) {
    4736       56988 :                 block_rsv_add_bytes(block_rsv, num_bytes, 1);
    4737       56988 :                 return 0;
    4738             :         }
    4739             : 
    4740             :         return ret;
    4741             : }
    4742             : 
    4743      174833 : int btrfs_block_rsv_check(struct btrfs_root *root,
    4744             :                           struct btrfs_block_rsv *block_rsv, int min_factor)
    4745             : {
    4746             :         u64 num_bytes = 0;
    4747             :         int ret = -ENOSPC;
    4748             : 
    4749      174833 :         if (!block_rsv)
    4750             :                 return 0;
    4751             : 
    4752             :         spin_lock(&block_rsv->lock);
    4753      174848 :         num_bytes = div_factor(block_rsv->size, min_factor);
    4754      174848 :         if (block_rsv->reserved >= num_bytes)
    4755             :                 ret = 0;
    4756             :         spin_unlock(&block_rsv->lock);
    4757             : 
    4758      174846 :         return ret;
    4759             : }
    4760             : 
    4761       10447 : int btrfs_block_rsv_refill(struct btrfs_root *root,
    4762             :                            struct btrfs_block_rsv *block_rsv, u64 min_reserved,
    4763             :                            enum btrfs_reserve_flush_enum flush)
    4764             : {
    4765             :         u64 num_bytes = 0;
    4766             :         int ret = -ENOSPC;
    4767             : 
    4768       10447 :         if (!block_rsv)
    4769             :                 return 0;
    4770             : 
    4771             :         spin_lock(&block_rsv->lock);
    4772             :         num_bytes = min_reserved;
    4773       10447 :         if (block_rsv->reserved >= num_bytes)
    4774             :                 ret = 0;
    4775             :         else
    4776        7864 :                 num_bytes -= block_rsv->reserved;
    4777             :         spin_unlock(&block_rsv->lock);
    4778             : 
    4779       10447 :         if (!ret)
    4780             :                 return 0;
    4781             : 
    4782        7864 :         ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
    4783        7864 :         if (!ret) {
    4784        7864 :                 block_rsv_add_bytes(block_rsv, num_bytes, 0);
    4785        7864 :                 return 0;
    4786             :         }
    4787             : 
    4788             :         return ret;
    4789             : }
    4790             : 
    4791       95152 : int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
    4792             :                             struct btrfs_block_rsv *dst_rsv,
    4793             :                             u64 num_bytes)
    4794             : {
    4795       95152 :         return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
    4796             : }
    4797             : 
    4798      293036 : void btrfs_block_rsv_release(struct btrfs_root *root,
    4799             :                              struct btrfs_block_rsv *block_rsv,
    4800             :                              u64 num_bytes)
    4801             : {
    4802      293036 :         struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
    4803      586074 :         if (global_rsv == block_rsv ||
    4804      293038 :             block_rsv->space_info != global_rsv->space_info)
    4805             :                 global_rsv = NULL;
    4806      293036 :         block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
    4807             :                                 num_bytes);
    4808      293076 : }
    4809             : 
    4810             : /*
    4811             :  * helper to calculate size of global block reservation.
    4812             :  * the desired value is sum of space used by extent tree,
    4813             :  * checksum tree and root tree
    4814             :  */
    4815        2406 : static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
    4816             : {
    4817             :         struct btrfs_space_info *sinfo;
    4818             :         u64 num_bytes;
    4819             :         u64 meta_used;
    4820             :         u64 data_used;
    4821        2406 :         int csum_size = btrfs_super_csum_size(fs_info->super_copy);
    4822             : 
    4823             :         sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
    4824             :         spin_lock(&sinfo->lock);
    4825        2406 :         data_used = sinfo->bytes_used;
    4826             :         spin_unlock(&sinfo->lock);
    4827             : 
    4828             :         sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
    4829             :         spin_lock(&sinfo->lock);
    4830        2406 :         if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
    4831             :                 data_used = 0;
    4832        2406 :         meta_used = sinfo->bytes_used;
    4833             :         spin_unlock(&sinfo->lock);
    4834             : 
    4835        2406 :         num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
    4836             :                     csum_size * 2;
    4837        4812 :         num_bytes += div64_u64(data_used + meta_used, 50);
    4838             : 
    4839        2406 :         if (num_bytes * 3 > meta_used)
    4840             :                 num_bytes = div64_u64(meta_used, 3);
    4841             : 
    4842        2406 :         return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
    4843             : }
    4844             : 
    4845        2406 : static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
    4846             : {
    4847             :         struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
    4848        2406 :         struct btrfs_space_info *sinfo = block_rsv->space_info;
    4849             :         u64 num_bytes;
    4850             : 
    4851        2406 :         num_bytes = calc_global_metadata_size(fs_info);
    4852             : 
    4853             :         spin_lock(&sinfo->lock);
    4854             :         spin_lock(&block_rsv->lock);
    4855             : 
    4856        2406 :         block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
    4857             : 
    4858        7218 :         num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
    4859        4812 :                     sinfo->bytes_reserved + sinfo->bytes_readonly +
    4860        2406 :                     sinfo->bytes_may_use;
    4861             : 
    4862        2406 :         if (sinfo->total_bytes > num_bytes) {
    4863        2403 :                 num_bytes = sinfo->total_bytes - num_bytes;
    4864        2403 :                 block_rsv->reserved += num_bytes;
    4865        2403 :                 sinfo->bytes_may_use += num_bytes;
    4866        2403 :                 trace_btrfs_space_reservation(fs_info, "space_info",
    4867             :                                       sinfo->flags, num_bytes, 1);
    4868             :         }
    4869             : 
    4870        2406 :         if (block_rsv->reserved >= block_rsv->size) {
    4871        2403 :                 num_bytes = block_rsv->reserved - block_rsv->size;
    4872        2403 :                 sinfo->bytes_may_use -= num_bytes;
    4873        2403 :                 trace_btrfs_space_reservation(fs_info, "space_info",
    4874             :                                       sinfo->flags, num_bytes, 0);
    4875        2403 :                 block_rsv->reserved = block_rsv->size;
    4876        2403 :                 block_rsv->full = 1;
    4877             :         }
    4878             : 
    4879             :         spin_unlock(&block_rsv->lock);
    4880             :         spin_unlock(&sinfo->lock);
    4881        2406 : }
    4882             : 
    4883         221 : static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
    4884             : {
    4885             :         struct btrfs_space_info *space_info;
    4886             : 
    4887             :         space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
    4888         221 :         fs_info->chunk_block_rsv.space_info = space_info;
    4889             : 
    4890             :         space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
    4891         221 :         fs_info->global_block_rsv.space_info = space_info;
    4892         221 :         fs_info->delalloc_block_rsv.space_info = space_info;
    4893         221 :         fs_info->trans_block_rsv.space_info = space_info;
    4894         221 :         fs_info->empty_block_rsv.space_info = space_info;
    4895         221 :         fs_info->delayed_block_rsv.space_info = space_info;
    4896             : 
    4897         221 :         fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
    4898         221 :         fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
    4899         221 :         fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
    4900         221 :         fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
    4901         221 :         if (fs_info->quota_root)
    4902           3 :                 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
    4903         221 :         fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
    4904             : 
    4905         221 :         update_global_block_rsv(fs_info);
    4906         221 : }
    4907             : 
    4908         221 : static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
    4909             : {
    4910         221 :         block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
    4911             :                                 (u64)-1);
    4912         221 :         WARN_ON(fs_info->delalloc_block_rsv.size > 0);
    4913         221 :         WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
    4914         221 :         WARN_ON(fs_info->trans_block_rsv.size > 0);
    4915         221 :         WARN_ON(fs_info->trans_block_rsv.reserved > 0);
    4916         221 :         WARN_ON(fs_info->chunk_block_rsv.size > 0);
    4917         221 :         WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
    4918         221 :         WARN_ON(fs_info->delayed_block_rsv.size > 0);
    4919         221 :         WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
    4920         221 : }
    4921             : 
    4922      359896 : void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
    4923             :                                   struct btrfs_root *root)
    4924             : {
    4925      359896 :         if (!trans->block_rsv)
    4926             :                 return;
    4927             : 
    4928      125052 :         if (!trans->bytes_reserved)
    4929             :                 return;
    4930             : 
    4931       51824 :         trace_btrfs_space_reservation(root->fs_info, "transaction",
    4932             :                                       trans->transid, trans->bytes_reserved, 0);
    4933       51824 :         btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
    4934       51824 :         trans->bytes_reserved = 0;
    4935             : }
    4936             : 
    4937             : /* Can only return 0 or -ENOSPC */
    4938        9630 : int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
    4939             :                                   struct inode *inode)
    4940             : {
    4941       19260 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4942        9630 :         struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
    4943        9630 :         struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
    4944             : 
    4945             :         /*
    4946             :          * We need to hold space in order to delete our orphan item once we've
    4947             :          * added it, so this takes the reservation so we can release it later
    4948             :          * when we are truly done with the orphan item.
    4949             :          */
    4950             :         u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
    4951        9630 :         trace_btrfs_space_reservation(root->fs_info, "orphan",
    4952             :                                       btrfs_ino(inode), num_bytes, 1);
    4953        9630 :         return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
    4954             : }
    4955             : 
    4956        9630 : void btrfs_orphan_release_metadata(struct inode *inode)
    4957             : {
    4958        9630 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4959             :         u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
    4960        9630 :         trace_btrfs_space_reservation(root->fs_info, "orphan",
    4961             :                                       btrfs_ino(inode), num_bytes, 0);
    4962        9630 :         btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
    4963        9630 : }
    4964             : 
    4965             : /*
    4966             :  * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
    4967             :  * root: the root of the parent directory
    4968             :  * rsv: block reservation
    4969             :  * items: the number of items that we need do reservation
    4970             :  * qgroup_reserved: used to return the reserved size in qgroup
    4971             :  *
    4972             :  * This function is used to reserve the space for snapshot/subvolume
    4973             :  * creation and deletion. Those operations are different with the
    4974             :  * common file/directory operations, they change two fs/file trees
    4975             :  * and root tree, the number of items that the qgroup reserves is
    4976             :  * different with the free space reservation. So we can not use
    4977             :  * the space reseravtion mechanism in start_transaction().
    4978             :  */
    4979         456 : int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
    4980             :                                      struct btrfs_block_rsv *rsv,
    4981             :                                      int items,
    4982             :                                      u64 *qgroup_reserved,
    4983             :                                      bool use_global_rsv)
    4984             : {
    4985             :         u64 num_bytes;
    4986             :         int ret;
    4987         228 :         struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
    4988             : 
    4989         228 :         if (root->fs_info->quota_enabled) {
    4990             :                 /* One for parent inode, two for dir entries */
    4991          11 :                 num_bytes = 3 * root->leafsize;
    4992          11 :                 ret = btrfs_qgroup_reserve(root, num_bytes);
    4993          11 :                 if (ret)
    4994             :                         return ret;
    4995             :         } else {
    4996             :                 num_bytes = 0;
    4997             :         }
    4998             : 
    4999         228 :         *qgroup_reserved = num_bytes;
    5000             : 
    5001         228 :         num_bytes = btrfs_calc_trans_metadata_size(root, items);
    5002         456 :         rsv->space_info = __find_space_info(root->fs_info,
    5003             :                                             BTRFS_BLOCK_GROUP_METADATA);
    5004         228 :         ret = btrfs_block_rsv_add(root, rsv, num_bytes,
    5005             :                                   BTRFS_RESERVE_FLUSH_ALL);
    5006             : 
    5007         228 :         if (ret == -ENOSPC && use_global_rsv)
    5008             :                 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
    5009             : 
    5010         228 :         if (ret) {
    5011           0 :                 if (*qgroup_reserved)
    5012           0 :                         btrfs_qgroup_free(root, *qgroup_reserved);
    5013             :         }
    5014             : 
    5015         228 :         return ret;
    5016             : }
    5017             : 
    5018         228 : void btrfs_subvolume_release_metadata(struct btrfs_root *root,
    5019             :                                       struct btrfs_block_rsv *rsv,
    5020             :                                       u64 qgroup_reserved)
    5021             : {
    5022         228 :         btrfs_block_rsv_release(root, rsv, (u64)-1);
    5023         228 :         if (qgroup_reserved)
    5024          11 :                 btrfs_qgroup_free(root, qgroup_reserved);
    5025         228 : }
    5026             : 
    5027             : /**
    5028             :  * drop_outstanding_extent - drop an outstanding extent
    5029             :  * @inode: the inode we're dropping the extent for
    5030             :  *
    5031             :  * This is called when we are freeing up an outstanding extent, either called
    5032             :  * after an error or after an extent is written.  This will return the number of
    5033             :  * reserved extents that need to be freed.  This must be called with
    5034             :  * BTRFS_I(inode)->lock held.
    5035             :  */
    5036      110632 : static unsigned drop_outstanding_extent(struct inode *inode)
    5037             : {
    5038             :         unsigned drop_inode_space = 0;
    5039             :         unsigned dropped_extents = 0;
    5040             : 
    5041      110632 :         BUG_ON(!BTRFS_I(inode)->outstanding_extents);
    5042      110632 :         BTRFS_I(inode)->outstanding_extents--;
    5043             : 
    5044      150455 :         if (BTRFS_I(inode)->outstanding_extents == 0 &&
    5045             :             test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
    5046       39822 :                                &BTRFS_I(inode)->runtime_flags))
    5047             :                 drop_inode_space = 1;
    5048             : 
    5049             :         /*
    5050             :          * If we have more or the same amount of outsanding extents than we have
    5051             :          * reserved then we need to leave the reserved extents count alone.
    5052             :          */
    5053      221266 :         if (BTRFS_I(inode)->outstanding_extents >=
    5054      110633 :             BTRFS_I(inode)->reserved_extents)
    5055             :                 return drop_inode_space;
    5056             : 
    5057       58102 :         dropped_extents = BTRFS_I(inode)->reserved_extents -
    5058             :                 BTRFS_I(inode)->outstanding_extents;
    5059       58102 :         BTRFS_I(inode)->reserved_extents -= dropped_extents;
    5060       58102 :         return dropped_extents + drop_inode_space;
    5061             : }
    5062             : 
    5063             : /**
    5064             :  * calc_csum_metadata_size - return the amount of metada space that must be
    5065             :  *      reserved/free'd for the given bytes.
    5066             :  * @inode: the inode we're manipulating
    5067             :  * @num_bytes: the number of bytes in question
    5068             :  * @reserve: 1 if we are reserving space, 0 if we are freeing space
    5069             :  *
    5070             :  * This adjusts the number of csum_bytes in the inode and then returns the
    5071             :  * correct amount of metadata that must either be reserved or freed.  We
    5072             :  * calculate how many checksums we can fit into one leaf and then divide the
    5073             :  * number of bytes that will need to be checksumed by this value to figure out
    5074             :  * how many checksums will be required.  If we are adding bytes then the number
    5075             :  * may go up and we will return the number of additional bytes that must be
    5076             :  * reserved.  If it is going down we will return the number of bytes that must
    5077             :  * be freed.
    5078             :  *
    5079             :  * This must be called with BTRFS_I(inode)->lock held.
    5080             :  */
    5081      239712 : static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
    5082             :                                    int reserve)
    5083             : {
    5084      319777 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    5085             :         u64 csum_size;
    5086             :         int num_csums_per_leaf;
    5087             :         int num_csums;
    5088             :         int old_csums;
    5089             : 
    5090      239712 :         if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
    5091           0 :             BTRFS_I(inode)->csum_bytes == 0)
    5092             :                 return 0;
    5093             : 
    5094      479428 :         old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
    5095      239714 :         if (reserve)
    5096      154342 :                 BTRFS_I(inode)->csum_bytes += num_bytes;
    5097             :         else
    5098       85372 :                 BTRFS_I(inode)->csum_bytes -= num_bytes;
    5099      239714 :         csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
    5100      239714 :         num_csums_per_leaf = (int)div64_u64(csum_size,
    5101             :                                             sizeof(struct btrfs_csum_item) +
    5102             :                                             sizeof(struct btrfs_disk_key));
    5103      479428 :         num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
    5104      239714 :         num_csums = num_csums + num_csums_per_leaf - 1;
    5105      239714 :         num_csums = num_csums / num_csums_per_leaf;
    5106             : 
    5107      239714 :         old_csums = old_csums + num_csums_per_leaf - 1;
    5108      239714 :         old_csums = old_csums / num_csums_per_leaf;
    5109             : 
    5110             :         /* No change, no need to reserve more */
    5111      239714 :         if (old_csums == num_csums)
    5112             :                 return 0;
    5113             : 
    5114       80065 :         if (reserve)
    5115       80408 :                 return btrfs_calc_trans_metadata_size(root,
    5116       40204 :                                                       num_csums - old_csums);
    5117             : 
    5118       79722 :         return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
    5119             : }
    5120             : 
    5121      154344 : int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
    5122             : {
    5123      308689 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    5124      154344 :         struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
    5125             :         u64 to_reserve = 0;
    5126             :         u64 csum_bytes;
    5127             :         unsigned nr_extents = 0;
    5128             :         int extra_reserve = 0;
    5129             :         enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
    5130             :         int ret = 0;
    5131             :         bool delalloc_lock = true;
    5132             :         u64 to_free = 0;
    5133             :         unsigned dropped;
    5134             : 
    5135             :         /* If we are a free space inode we need to not flush since we will be in
    5136             :          * the middle of a transaction commit.  We also don't need the delalloc
    5137             :          * mutex since we won't race with anybody.  We need this mostly to make
    5138             :          * lockdep shut its filthy mouth.
    5139             :          */
    5140      154344 :         if (btrfs_is_free_space_inode(inode)) {
    5141             :                 flush = BTRFS_RESERVE_NO_FLUSH;
    5142             :                 delalloc_lock = false;
    5143             :         }
    5144             : 
    5145      308689 :         if (flush != BTRFS_RESERVE_NO_FLUSH &&
    5146      154344 :             btrfs_transaction_in_commit(root->fs_info))
    5147        1026 :                 schedule_timeout(1);
    5148             : 
    5149      154345 :         if (delalloc_lock)
    5150      154345 :                 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
    5151             : 
    5152      154343 :         num_bytes = ALIGN(num_bytes, root->sectorsize);
    5153             : 
    5154             :         spin_lock(&BTRFS_I(inode)->lock);
    5155      154344 :         BTRFS_I(inode)->outstanding_extents++;
    5156             : 
    5157      154344 :         if (BTRFS_I(inode)->outstanding_extents >
    5158      154344 :             BTRFS_I(inode)->reserved_extents)
    5159       63004 :                 nr_extents = BTRFS_I(inode)->outstanding_extents -
    5160             :                         BTRFS_I(inode)->reserved_extents;
    5161             : 
    5162             :         /*
    5163             :          * Add an item to reserve for updating the inode when we complete the
    5164             :          * delalloc io.
    5165             :          */
    5166      154344 :         if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
    5167             :                       &BTRFS_I(inode)->runtime_flags)) {
    5168       39865 :                 nr_extents++;
    5169             :                 extra_reserve = 1;
    5170             :         }
    5171             : 
    5172             :         to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
    5173      154344 :         to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
    5174      154340 :         csum_bytes = BTRFS_I(inode)->csum_bytes;
    5175             :         spin_unlock(&BTRFS_I(inode)->lock);
    5176             : 
    5177      154344 :         if (root->fs_info->quota_enabled) {
    5178        4790 :                 ret = btrfs_qgroup_reserve(root, num_bytes +
    5179        4790 :                                            nr_extents * root->leafsize);
    5180        4791 :                 if (ret)
    5181             :                         goto out_fail;
    5182             :         }
    5183             : 
    5184      154333 :         ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
    5185      154330 :         if (unlikely(ret)) {
    5186           0 :                 if (root->fs_info->quota_enabled)
    5187           0 :                         btrfs_qgroup_free(root, num_bytes +
    5188           0 :                                                 nr_extents * root->leafsize);
    5189             :                 goto out_fail;
    5190             :         }
    5191             : 
    5192             :         spin_lock(&BTRFS_I(inode)->lock);
    5193      154331 :         if (extra_reserve) {
    5194             :                 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
    5195             :                         &BTRFS_I(inode)->runtime_flags);
    5196       39857 :                 nr_extents--;
    5197             :         }
    5198      154331 :         BTRFS_I(inode)->reserved_extents += nr_extents;
    5199             :         spin_unlock(&BTRFS_I(inode)->lock);
    5200             : 
    5201      154332 :         if (delalloc_lock)
    5202      154331 :                 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
    5203             : 
    5204      154332 :         if (to_reserve)
    5205       63370 :                 trace_btrfs_space_reservation(root->fs_info, "delalloc",
    5206             :                                               btrfs_ino(inode), to_reserve, 1);
    5207      154332 :         block_rsv_add_bytes(block_rsv, to_reserve, 1);
    5208             : 
    5209      154333 :         return 0;
    5210             : 
    5211             : out_fail:
    5212             :         spin_lock(&BTRFS_I(inode)->lock);
    5213          12 :         dropped = drop_outstanding_extent(inode);
    5214             :         /*
    5215             :          * If the inodes csum_bytes is the same as the original
    5216             :          * csum_bytes then we know we haven't raced with any free()ers
    5217             :          * so we can just reduce our inodes csum bytes and carry on.
    5218             :          */
    5219          12 :         if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
    5220          12 :                 calc_csum_metadata_size(inode, num_bytes, 0);
    5221             :         } else {
    5222             :                 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
    5223             :                 u64 bytes;
    5224             : 
    5225             :                 /*
    5226             :                  * This is tricky, but first we need to figure out how much we
    5227             :                  * free'd from any free-ers that occured during this
    5228             :                  * reservation, so we reset ->csum_bytes to the csum_bytes
    5229             :                  * before we dropped our lock, and then call the free for the
    5230             :                  * number of bytes that were freed while we were trying our
    5231             :                  * reservation.
    5232             :                  */
    5233           0 :                 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
    5234           0 :                 BTRFS_I(inode)->csum_bytes = csum_bytes;
    5235           0 :                 to_free = calc_csum_metadata_size(inode, bytes, 0);
    5236             : 
    5237             : 
    5238             :                 /*
    5239             :                  * Now we need to see how much we would have freed had we not
    5240             :                  * been making this reservation and our ->csum_bytes were not
    5241             :                  * artificially inflated.
    5242             :                  */
    5243           0 :                 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
    5244             :                 bytes = csum_bytes - orig_csum_bytes;
    5245           0 :                 bytes = calc_csum_metadata_size(inode, bytes, 0);
    5246             : 
    5247             :                 /*
    5248             :                  * Now reset ->csum_bytes to what it should be.  If bytes is
    5249             :                  * more than to_free then we would have free'd more space had we
    5250             :                  * not had an artificially high ->csum_bytes, so we need to free
    5251             :                  * the remainder.  If bytes is the same or less then we don't
    5252             :                  * need to do anything, the other free-ers did the correct
    5253             :                  * thing.
    5254             :                  */
    5255           0 :                 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
    5256           0 :                 if (bytes > to_free)
    5257           0 :                         to_free = bytes - to_free;
    5258             :                 else
    5259             :                         to_free = 0;
    5260             :         }
    5261             :         spin_unlock(&BTRFS_I(inode)->lock);
    5262          12 :         if (dropped)
    5263           1 :                 to_free += btrfs_calc_trans_metadata_size(root, dropped);
    5264             : 
    5265          12 :         if (to_free) {
    5266           1 :                 btrfs_block_rsv_release(root, block_rsv, to_free);
    5267           1 :                 trace_btrfs_space_reservation(root->fs_info, "delalloc",
    5268             :                                               btrfs_ino(inode), to_free, 0);
    5269             :         }
    5270          12 :         if (delalloc_lock)
    5271          12 :                 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
    5272          12 :         return ret;
    5273             : }
    5274             : 
    5275             : /**
    5276             :  * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
    5277             :  * @inode: the inode to release the reservation for
    5278             :  * @num_bytes: the number of bytes we're releasing
    5279             :  *
    5280             :  * This will release the metadata reservation for an inode.  This can be called
    5281             :  * once we complete IO for a given set of bytes to release their metadata
    5282             :  * reservations.
    5283             :  */
    5284      110613 : void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
    5285             : {
    5286      168712 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    5287             :         u64 to_free = 0;
    5288             :         unsigned dropped;
    5289             : 
    5290      110613 :         num_bytes = ALIGN(num_bytes, root->sectorsize);
    5291             :         spin_lock(&BTRFS_I(inode)->lock);
    5292      110621 :         dropped = drop_outstanding_extent(inode);
    5293             : 
    5294      110618 :         if (num_bytes)
    5295       85360 :                 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
    5296             :         spin_unlock(&BTRFS_I(inode)->lock);
    5297      110616 :         if (dropped > 0)
    5298       58099 :                 to_free += btrfs_calc_trans_metadata_size(root, dropped);
    5299             : 
    5300      110616 :         trace_btrfs_space_reservation(root->fs_info, "delalloc",
    5301             :                                       btrfs_ino(inode), to_free, 0);
    5302      110615 :         if (root->fs_info->quota_enabled) {
    5303        8271 :                 btrfs_qgroup_free(root, num_bytes +
    5304        8271 :                                         dropped * root->leafsize);
    5305             :         }
    5306             : 
    5307      110617 :         btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
    5308             :                                 to_free);
    5309      110620 : }
    5310             : 
    5311             : /**
    5312             :  * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc
    5313             :  * @inode: inode we're writing to
    5314             :  * @num_bytes: the number of bytes we want to allocate
    5315             :  *
    5316             :  * This will do the following things
    5317             :  *
    5318             :  * o reserve space in the data space info for num_bytes
    5319             :  * o reserve space in the metadata space info based on number of outstanding
    5320             :  *   extents and how much csums will be needed
    5321             :  * o add to the inodes ->delalloc_bytes
    5322             :  * o add it to the fs_info's delalloc inodes list.
    5323             :  *
    5324             :  * This will return 0 for success and -ENOSPC if there is no space left.
    5325             :  */
    5326       35037 : int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
    5327             : {
    5328             :         int ret;
    5329             : 
    5330       35037 :         ret = btrfs_check_data_free_space(inode, num_bytes);
    5331       35039 :         if (ret)
    5332             :                 return ret;
    5333             : 
    5334       35039 :         ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
    5335       35039 :         if (ret) {
    5336          10 :                 btrfs_free_reserved_data_space(inode, num_bytes);
    5337          10 :                 return ret;
    5338             :         }
    5339             : 
    5340             :         return 0;
    5341             : }
    5342             : 
    5343             : /**
    5344             :  * btrfs_delalloc_release_space - release data and metadata space for delalloc
    5345             :  * @inode: inode we're releasing space for
    5346             :  * @num_bytes: the number of bytes we want to free up
    5347             :  *
    5348             :  * This must be matched with a call to btrfs_delalloc_reserve_space.  This is
    5349             :  * called in the case that we don't need the metadata AND data reservations
    5350             :  * anymore.  So if there is an error or we insert an inline extent.
    5351             :  *
    5352             :  * This function will release the metadata space that was not used and will
    5353             :  * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
    5354             :  * list if there are no delalloc bytes left.
    5355             :  */
    5356           0 : void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
    5357             : {
    5358           0 :         btrfs_delalloc_release_metadata(inode, num_bytes);
    5359           0 :         btrfs_free_reserved_data_space(inode, num_bytes);
    5360           0 : }
    5361             : 
    5362      170354 : static int update_block_group(struct btrfs_root *root,
    5363             :                               u64 bytenr, u64 num_bytes, int alloc)
    5364             : {
    5365             :         struct btrfs_block_group_cache *cache = NULL;
    5366      170354 :         struct btrfs_fs_info *info = root->fs_info;
    5367             :         u64 total = num_bytes;
    5368             :         u64 old_val;
    5369             :         u64 byte_in_group;
    5370             :         int factor;
    5371             : 
    5372             :         /* block accounting for super block */
    5373             :         spin_lock(&info->delalloc_root_lock);
    5374      170356 :         old_val = btrfs_super_bytes_used(info->super_copy);
    5375      170356 :         if (alloc)
    5376      106085 :                 old_val += num_bytes;
    5377             :         else
    5378       64271 :                 old_val -= num_bytes;
    5379             :         btrfs_set_super_bytes_used(info->super_copy, old_val);
    5380             :         spin_unlock(&info->delalloc_root_lock);
    5381             : 
    5382      340709 :         while (total) {
    5383             :                 cache = btrfs_lookup_block_group(info, bytenr);
    5384      170356 :                 if (!cache)
    5385             :                         return -ENOENT;
    5386      170356 :                 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
    5387             :                                     BTRFS_BLOCK_GROUP_RAID1 |
    5388             :                                     BTRFS_BLOCK_GROUP_RAID10))
    5389             :                         factor = 2;
    5390             :                 else
    5391             :                         factor = 1;
    5392             :                 /*
    5393             :                  * If this block group has free space cache written out, we
    5394             :                  * need to make sure to load it if we are removing space.  This
    5395             :                  * is because we need the unpinning stage to actually add the
    5396             :                  * space back to the block group, otherwise we will leak space.
    5397             :                  */
    5398      170356 :                 if (!alloc && cache->cached == BTRFS_CACHE_NO)
    5399           5 :                         cache_block_group(cache, 1);
    5400             : 
    5401      170356 :                 byte_in_group = bytenr - cache->key.objectid;
    5402      170356 :                 WARN_ON(byte_in_group > cache->key.offset);
    5403             : 
    5404      170356 :                 spin_lock(&cache->space_info->lock);
    5405             :                 spin_lock(&cache->lock);
    5406             : 
    5407      340712 :                 if (btrfs_test_opt(root, SPACE_CACHE) &&
    5408      170356 :                     cache->disk_cache_state < BTRFS_DC_CLEAR)
    5409        5387 :                         cache->disk_cache_state = BTRFS_DC_CLEAR;
    5410             : 
    5411      170356 :                 cache->dirty = 1;
    5412             :                 old_val = btrfs_block_group_used(&cache->item);
    5413      170356 :                 num_bytes = min(total, cache->key.offset - byte_in_group);
    5414      170356 :                 if (alloc) {
    5415      106085 :                         old_val += num_bytes;
    5416             :                         btrfs_set_block_group_used(&cache->item, old_val);
    5417      106085 :                         cache->reserved -= num_bytes;
    5418      106085 :                         cache->space_info->bytes_reserved -= num_bytes;
    5419      106085 :                         cache->space_info->bytes_used += num_bytes;
    5420      106085 :                         cache->space_info->disk_used += num_bytes * factor;
    5421             :                         spin_unlock(&cache->lock);
    5422      106085 :                         spin_unlock(&cache->space_info->lock);
    5423             :                 } else {
    5424       64271 :                         old_val -= num_bytes;
    5425             :                         btrfs_set_block_group_used(&cache->item, old_val);
    5426       64271 :                         cache->pinned += num_bytes;
    5427       64271 :                         cache->space_info->bytes_pinned += num_bytes;
    5428       64271 :                         cache->space_info->bytes_used -= num_bytes;
    5429       64271 :                         cache->space_info->disk_used -= num_bytes * factor;
    5430             :                         spin_unlock(&cache->lock);
    5431       64271 :                         spin_unlock(&cache->space_info->lock);
    5432             : 
    5433       64271 :                         set_extent_dirty(info->pinned_extents,
    5434       64271 :                                          bytenr, bytenr + num_bytes - 1,
    5435             :                                          GFP_NOFS | __GFP_NOFAIL);
    5436             :                 }
    5437      170356 :                 btrfs_put_block_group(cache);
    5438      170354 :                 total -= num_bytes;
    5439      170354 :                 bytenr += num_bytes;
    5440             :         }
    5441             :         return 0;
    5442             : }
    5443             : 
    5444      112258 : static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
    5445             : {
    5446             :         struct btrfs_block_group_cache *cache;
    5447             :         u64 bytenr;
    5448             : 
    5449      112258 :         spin_lock(&root->fs_info->block_group_cache_lock);
    5450      112265 :         bytenr = root->fs_info->first_logical_byte;
    5451             :         spin_unlock(&root->fs_info->block_group_cache_lock);
    5452             : 
    5453      112265 :         if (bytenr < (u64)-1)
    5454             :                 return bytenr;
    5455             : 
    5456          22 :         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
    5457          22 :         if (!cache)
    5458             :                 return 0;
    5459             : 
    5460          22 :         bytenr = cache->key.objectid;
    5461          22 :         btrfs_put_block_group(cache);
    5462             : 
    5463             :         return bytenr;
    5464             : }
    5465             : 
    5466        4969 : static int pin_down_extent(struct btrfs_root *root,
    5467             :                            struct btrfs_block_group_cache *cache,
    5468             :                            u64 bytenr, u64 num_bytes, int reserved)
    5469             : {
    5470        4969 :         spin_lock(&cache->space_info->lock);
    5471             :         spin_lock(&cache->lock);
    5472        4969 :         cache->pinned += num_bytes;
    5473        4969 :         cache->space_info->bytes_pinned += num_bytes;
    5474        4969 :         if (reserved) {
    5475        4969 :                 cache->reserved -= num_bytes;
    5476        4969 :                 cache->space_info->bytes_reserved -= num_bytes;
    5477             :         }
    5478             :         spin_unlock(&cache->lock);
    5479        4969 :         spin_unlock(&cache->space_info->lock);
    5480             : 
    5481        4969 :         set_extent_dirty(root->fs_info->pinned_extents, bytenr,
    5482        4969 :                          bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
    5483        4969 :         if (reserved)
    5484        4969 :                 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
    5485        4969 :         return 0;
    5486             : }
    5487             : 
    5488             : /*
    5489             :  * this function must be called within transaction
    5490             :  */
    5491         889 : int btrfs_pin_extent(struct btrfs_root *root,
    5492             :                      u64 bytenr, u64 num_bytes, int reserved)
    5493             : {
    5494             :         struct btrfs_block_group_cache *cache;
    5495             : 
    5496         889 :         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
    5497         889 :         BUG_ON(!cache); /* Logic error */
    5498             : 
    5499         889 :         pin_down_extent(root, cache, bytenr, num_bytes, reserved);
    5500             : 
    5501         889 :         btrfs_put_block_group(cache);
    5502         889 :         return 0;
    5503             : }
    5504             : 
    5505             : /*
    5506             :  * this function must be called within transaction
    5507             :  */
    5508           0 : int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
    5509             :                                     u64 bytenr, u64 num_bytes)
    5510             : {
    5511             :         struct btrfs_block_group_cache *cache;
    5512             :         int ret;
    5513             : 
    5514           0 :         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
    5515           0 :         if (!cache)
    5516             :                 return -EINVAL;
    5517             : 
    5518             :         /*
    5519             :          * pull in the free space cache (if any) so that our pin
    5520             :          * removes the free space from the cache.  We have load_only set
    5521             :          * to one because the slow code to read in the free extents does check
    5522             :          * the pinned extents.
    5523             :          */
    5524           0 :         cache_block_group(cache, 1);
    5525             : 
    5526           0 :         pin_down_extent(root, cache, bytenr, num_bytes, 0);
    5527             : 
    5528             :         /* remove us from the free space cache (if we're there at all) */
    5529           0 :         ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
    5530           0 :         btrfs_put_block_group(cache);
    5531           0 :         return ret;
    5532             : }
    5533             : 
    5534           0 : static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
    5535             : {
    5536             :         int ret;
    5537             :         struct btrfs_block_group_cache *block_group;
    5538             :         struct btrfs_caching_control *caching_ctl;
    5539             : 
    5540           0 :         block_group = btrfs_lookup_block_group(root->fs_info, start);
    5541           0 :         if (!block_group)
    5542             :                 return -EINVAL;
    5543             : 
    5544           0 :         cache_block_group(block_group, 0);
    5545           0 :         caching_ctl = get_caching_control(block_group);
    5546             : 
    5547           0 :         if (!caching_ctl) {
    5548             :                 /* Logic error */
    5549           0 :                 BUG_ON(!block_group_cache_done(block_group));
    5550           0 :                 ret = btrfs_remove_free_space(block_group, start, num_bytes);
    5551             :         } else {
    5552           0 :                 mutex_lock(&caching_ctl->mutex);
    5553             : 
    5554           0 :                 if (start >= caching_ctl->progress) {
    5555           0 :                         ret = add_excluded_extent(root, start, num_bytes);
    5556           0 :                 } else if (start + num_bytes <= caching_ctl->progress) {
    5557           0 :                         ret = btrfs_remove_free_space(block_group,
    5558             :                                                       start, num_bytes);
    5559             :                 } else {
    5560           0 :                         num_bytes = caching_ctl->progress - start;
    5561           0 :                         ret = btrfs_remove_free_space(block_group,
    5562             :                                                       start, num_bytes);
    5563           0 :                         if (ret)
    5564             :                                 goto out_lock;
    5565             : 
    5566           0 :                         num_bytes = (start + num_bytes) -
    5567           0 :                                 caching_ctl->progress;
    5568             :                         start = caching_ctl->progress;
    5569           0 :                         ret = add_excluded_extent(root, start, num_bytes);
    5570             :                 }
    5571             : out_lock:
    5572           0 :                 mutex_unlock(&caching_ctl->mutex);
    5573           0 :                 put_caching_control(caching_ctl);
    5574             :         }
    5575           0 :         btrfs_put_block_group(block_group);
    5576           0 :         return ret;
    5577             : }
    5578             : 
    5579           0 : int btrfs_exclude_logged_extents(struct btrfs_root *log,
    5580           0 :                                  struct extent_buffer *eb)
    5581             : {
    5582             :         struct btrfs_file_extent_item *item;
    5583             :         struct btrfs_key key;
    5584             :         int found_type;
    5585             :         int i;
    5586             : 
    5587           0 :         if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
    5588             :                 return 0;
    5589             : 
    5590           0 :         for (i = 0; i < btrfs_header_nritems(eb); i++) {
    5591           0 :                 btrfs_item_key_to_cpu(eb, &key, i);
    5592           0 :                 if (key.type != BTRFS_EXTENT_DATA_KEY)
    5593           0 :                         continue;
    5594           0 :                 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
    5595             :                 found_type = btrfs_file_extent_type(eb, item);
    5596           0 :                 if (found_type == BTRFS_FILE_EXTENT_INLINE)
    5597           0 :                         continue;
    5598           0 :                 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
    5599           0 :                         continue;
    5600           0 :                 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
    5601           0 :                 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
    5602           0 :                 __exclude_logged_extent(log, key.objectid, key.offset);
    5603             :         }
    5604             : 
    5605             :         return 0;
    5606             : }
    5607             : 
    5608             : /**
    5609             :  * btrfs_update_reserved_bytes - update the block_group and space info counters
    5610             :  * @cache:      The cache we are manipulating
    5611             :  * @num_bytes:  The number of bytes in question
    5612             :  * @reserve:    One of the reservation enums
    5613             :  * @delalloc:   The blocks are allocated for the delalloc write
    5614             :  *
    5615             :  * This is called by the allocator when it reserves space, or by somebody who is
    5616             :  * freeing space that was never actually used on disk.  For example if you
    5617             :  * reserve some space for a new leaf in transaction A and before transaction A
    5618             :  * commits you free that leaf, you call this with reserve set to 0 in order to
    5619             :  * clear the reservation.
    5620             :  *
    5621             :  * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
    5622             :  * ENOSPC accounting.  For data we handle the reservation through clearing the
    5623             :  * delalloc bits in the io_tree.  We have to do this since we could end up
    5624             :  * allocating less disk space for the amount of data we have reserved in the
    5625             :  * case of compression.
    5626             :  *
    5627             :  * If this is a reservation and the block group has become read only we cannot
    5628             :  * make the reservation and return -EAGAIN, otherwise this function always
    5629             :  * succeeds.
    5630             :  */
    5631      113443 : static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
    5632             :                                        u64 num_bytes, int reserve, int delalloc)
    5633             : {
    5634      113443 :         struct btrfs_space_info *space_info = cache->space_info;
    5635             :         int ret = 0;
    5636             : 
    5637             :         spin_lock(&space_info->lock);
    5638             :         spin_lock(&cache->lock);
    5639      113446 :         if (reserve != RESERVE_FREE) {
    5640      112266 :                 if (cache->ro) {
    5641             :                         ret = -EAGAIN;
    5642             :                 } else {
    5643      112266 :                         cache->reserved += num_bytes;
    5644      112266 :                         space_info->bytes_reserved += num_bytes;
    5645      112266 :                         if (reserve == RESERVE_ALLOC) {
    5646       58886 :                                 trace_btrfs_space_reservation(cache->fs_info,
    5647             :                                                 "space_info", space_info->flags,
    5648             :                                                 num_bytes, 0);
    5649       58886 :                                 space_info->bytes_may_use -= num_bytes;
    5650             :                         }
    5651             : 
    5652      112266 :                         if (delalloc)
    5653       45798 :                                 cache->delalloc_bytes += num_bytes;
    5654             :                 }
    5655             :         } else {
    5656        1180 :                 if (cache->ro)
    5657           0 :                         space_info->bytes_readonly += num_bytes;
    5658        1180 :                 cache->reserved -= num_bytes;
    5659        1180 :                 space_info->bytes_reserved -= num_bytes;
    5660             : 
    5661        1180 :                 if (delalloc)
    5662           0 :                         cache->delalloc_bytes -= num_bytes;
    5663             :         }
    5664             :         spin_unlock(&cache->lock);
    5665             :         spin_unlock(&space_info->lock);
    5666      113444 :         return ret;
    5667             : }
    5668             : 
    5669        2098 : void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
    5670             :                                 struct btrfs_root *root)
    5671             : {
    5672        2098 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5673             :         struct btrfs_caching_control *next;
    5674             :         struct btrfs_caching_control *caching_ctl;
    5675             :         struct btrfs_block_group_cache *cache;
    5676             : 
    5677        2098 :         down_write(&fs_info->commit_root_sem);
    5678             : 
    5679        2304 :         list_for_each_entry_safe(caching_ctl, next,
    5680             :                                  &fs_info->caching_block_groups, list) {
    5681         206 :                 cache = caching_ctl->block_group;
    5682         206 :                 if (block_group_cache_done(cache)) {
    5683         206 :                         cache->last_byte_to_unpin = (u64)-1;
    5684             :                         list_del_init(&caching_ctl->list);
    5685         206 :                         put_caching_control(caching_ctl);
    5686             :                 } else {
    5687           0 :                         cache->last_byte_to_unpin = caching_ctl->progress;
    5688             :                 }
    5689             :         }
    5690             : 
    5691        2098 :         if (fs_info->pinned_extents == &fs_info->freed_extents[0])
    5692        1110 :                 fs_info->pinned_extents = &fs_info->freed_extents[1];
    5693             :         else
    5694         988 :                 fs_info->pinned_extents = &fs_info->freed_extents[0];
    5695             : 
    5696        2098 :         up_write(&fs_info->commit_root_sem);
    5697             : 
    5698        2098 :         update_global_block_rsv(fs_info);
    5699        2098 : }
    5700             : 
    5701       32303 : static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
    5702             : {
    5703       32303 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5704       32305 :         struct btrfs_block_group_cache *cache = NULL;
    5705             :         struct btrfs_space_info *space_info;
    5706             :         struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
    5707             :         u64 len;
    5708             :         bool readonly;
    5709             : 
    5710       64608 :         while (start <= end) {
    5711             :                 readonly = false;
    5712       32307 :                 if (!cache ||
    5713           2 :                     start >= cache->key.objectid + cache->key.offset) {
    5714       32305 :                         if (cache)
    5715           2 :                                 btrfs_put_block_group(cache);
    5716             :                         cache = btrfs_lookup_block_group(fs_info, start);
    5717       32305 :                         BUG_ON(!cache); /* Logic error */
    5718             :                 }
    5719             : 
    5720       32305 :                 len = cache->key.objectid + cache->key.offset - start;
    5721       32305 :                 len = min(len, end + 1 - start);
    5722             : 
    5723       32305 :                 if (start < cache->last_byte_to_unpin) {
    5724       32305 :                         len = min(len, cache->last_byte_to_unpin - start);
    5725             :                         btrfs_add_free_space(cache, start, len);
    5726             :                 }
    5727             : 
    5728       32305 :                 start += len;
    5729       32305 :                 space_info = cache->space_info;
    5730             : 
    5731             :                 spin_lock(&space_info->lock);
    5732             :                 spin_lock(&cache->lock);
    5733       32305 :                 cache->pinned -= len;
    5734       32305 :                 space_info->bytes_pinned -= len;
    5735       32305 :                 percpu_counter_add(&space_info->total_bytes_pinned, -len);
    5736       32305 :                 if (cache->ro) {
    5737         242 :                         space_info->bytes_readonly += len;
    5738             :                         readonly = true;
    5739             :                 }
    5740             :                 spin_unlock(&cache->lock);
    5741       32305 :                 if (!readonly && global_rsv->space_info == space_info) {
    5742             :                         spin_lock(&global_rsv->lock);
    5743       23374 :                         if (!global_rsv->full) {
    5744         204 :                                 len = min(len, global_rsv->size -
    5745             :                                           global_rsv->reserved);
    5746         204 :                                 global_rsv->reserved += len;
    5747         204 :                                 space_info->bytes_may_use += len;
    5748         204 :                                 if (global_rsv->reserved >= global_rsv->size)
    5749          43 :                                         global_rsv->full = 1;
    5750             :                         }
    5751             :                         spin_unlock(&global_rsv->lock);
    5752             :                 }
    5753             :                 spin_unlock(&space_info->lock);
    5754             :         }
    5755             : 
    5756       32303 :         if (cache)
    5757       32303 :                 btrfs_put_block_group(cache);
    5758       32303 :         return 0;
    5759             : }
    5760             : 
    5761        2098 : int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
    5762       32303 :                                struct btrfs_root *root)
    5763             : {
    5764        2098 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5765             :         struct extent_io_tree *unpin;
    5766             :         u64 start;
    5767             :         u64 end;
    5768             :         int ret;
    5769             : 
    5770        2098 :         if (trans->aborted)
    5771             :                 return 0;
    5772             : 
    5773        2098 :         if (fs_info->pinned_extents == &fs_info->freed_extents[0])
    5774         988 :                 unpin = &fs_info->freed_extents[1];
    5775             :         else
    5776             :                 unpin = &fs_info->freed_extents[0];
    5777             : 
    5778             :         while (1) {
    5779       34401 :                 ret = find_first_extent_bit(unpin, 0, &start, &end,
    5780             :                                             EXTENT_DIRTY, NULL);
    5781       34401 :                 if (ret)
    5782             :                         break;
    5783             : 
    5784       32303 :                 if (btrfs_test_opt(root, DISCARD))
    5785           0 :                         ret = btrfs_discard_extent(root, start,
    5786           0 :                                                    end + 1 - start, NULL);
    5787             : 
    5788       32303 :                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
    5789       64606 :                 unpin_extent_range(root, start, end);
    5790       32303 :                 cond_resched();
    5791       32303 :         }
    5792             : 
    5793             :         return 0;
    5794             : }
    5795             : 
    5796       86060 : static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
    5797             :                              u64 owner, u64 root_objectid)
    5798             : {
    5799             :         struct btrfs_space_info *space_info;
    5800             :         u64 flags;
    5801             : 
    5802       86060 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    5803       53405 :                 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
    5804             :                         flags = BTRFS_BLOCK_GROUP_SYSTEM;
    5805             :                 else
    5806             :                         flags = BTRFS_BLOCK_GROUP_METADATA;
    5807             :         } else {
    5808             :                 flags = BTRFS_BLOCK_GROUP_DATA;
    5809             :         }
    5810             : 
    5811             :         space_info = __find_space_info(fs_info, flags);
    5812       86062 :         BUG_ON(!space_info); /* Logic bug */
    5813       86062 :         percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
    5814       86062 : }
    5815             : 
    5816             : 
    5817       71066 : static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
    5818             :                                 struct btrfs_root *root,
    5819             :                                 u64 bytenr, u64 num_bytes, u64 parent,
    5820             :                                 u64 root_objectid, u64 owner_objectid,
    5821             :                                 u64 owner_offset, int refs_to_drop,
    5822             :                                 struct btrfs_delayed_extent_op *extent_op,
    5823             :                                 int no_quota)
    5824             : {
    5825             :         struct btrfs_key key;
    5826       15648 :         struct btrfs_path *path;
    5827       71066 :         struct btrfs_fs_info *info = root->fs_info;
    5828       71066 :         struct btrfs_root *extent_root = info->extent_root;
    5829             :         struct extent_buffer *leaf;
    5830             :         struct btrfs_extent_item *ei;
    5831             :         struct btrfs_extent_inline_ref *iref;
    5832             :         int ret;
    5833             :         int is_data;
    5834             :         int extent_slot = 0;
    5835             :         int found_extent = 0;
    5836             :         int num_to_del = 1;
    5837             :         u32 item_size;
    5838             :         u64 refs;
    5839       71066 :         int last_ref = 0;
    5840             :         enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
    5841       71066 :         bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
    5842             :                                                  SKINNY_METADATA);
    5843             : 
    5844       78534 :         if (!info->quota_enabled || !is_fstree(root_objectid))
    5845             :                 no_quota = 1;
    5846             : 
    5847       71066 :         path = btrfs_alloc_path();
    5848       71063 :         if (!path)
    5849             :                 return -ENOMEM;
    5850             : 
    5851       71063 :         path->reada = 1;
    5852       71063 :         path->leave_spinning = 1;
    5853             : 
    5854       71063 :         is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
    5855       71063 :         BUG_ON(!is_data && refs_to_drop != 1);
    5856             : 
    5857       71063 :         if (is_data)
    5858             :                 skinny_metadata = 0;
    5859             : 
    5860       71063 :         ret = lookup_extent_backref(trans, extent_root, path, &iref,
    5861             :                                     bytenr, num_bytes, parent,
    5862             :                                     root_objectid, owner_objectid,
    5863             :                                     owner_offset);
    5864       71066 :         if (ret == 0) {
    5865       71066 :                 extent_slot = path->slots[0];
    5866      142132 :                 while (extent_slot >= 0) {
    5867       71066 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
    5868             :                                               extent_slot);
    5869       71066 :                         if (key.objectid != bytenr)
    5870             :                                 break;
    5871      142132 :                         if (key.type == BTRFS_EXTENT_ITEM_KEY &&
    5872       71066 :                             key.offset == num_bytes) {
    5873             :                                 found_extent = 1;
    5874             :                                 break;
    5875             :                         }
    5876           0 :                         if (key.type == BTRFS_METADATA_ITEM_KEY &&
    5877           0 :                             key.offset == owner_objectid) {
    5878             :                                 found_extent = 1;
    5879             :                                 break;
    5880             :                         }
    5881           0 :                         if (path->slots[0] - extent_slot > 5)
    5882             :                                 break;
    5883           0 :                         extent_slot--;
    5884             :                 }
    5885             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    5886       71066 :                 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
    5887       71066 :                 if (found_extent && item_size < sizeof(*ei))
    5888             :                         found_extent = 0;
    5889             : #endif
    5890       71066 :                 if (!found_extent) {
    5891           0 :                         BUG_ON(iref);
    5892           0 :                         ret = remove_extent_backref(trans, extent_root, path,
    5893             :                                                     NULL, refs_to_drop,
    5894             :                                                     is_data, &last_ref);
    5895           0 :                         if (ret) {
    5896           0 :                                 btrfs_abort_transaction(trans, extent_root, ret);
    5897           0 :                                 goto out;
    5898             :                         }
    5899           0 :                         btrfs_release_path(path);
    5900           0 :                         path->leave_spinning = 1;
    5901             : 
    5902           0 :                         key.objectid = bytenr;
    5903           0 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
    5904           0 :                         key.offset = num_bytes;
    5905             : 
    5906           0 :                         if (!is_data && skinny_metadata) {
    5907           0 :                                 key.type = BTRFS_METADATA_ITEM_KEY;
    5908           0 :                                 key.offset = owner_objectid;
    5909             :                         }
    5910             : 
    5911           0 :                         ret = btrfs_search_slot(trans, extent_root,
    5912             :                                                 &key, path, -1, 1);
    5913           0 :                         if (ret > 0 && skinny_metadata && path->slots[0]) {
    5914             :                                 /*
    5915             :                                  * Couldn't find our skinny metadata item,
    5916             :                                  * see if we have ye olde extent item.
    5917             :                                  */
    5918           0 :                                 path->slots[0]--;
    5919           0 :                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    5920             :                                                       path->slots[0]);
    5921           0 :                                 if (key.objectid == bytenr &&
    5922           0 :                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
    5923           0 :                                     key.offset == num_bytes)
    5924             :                                         ret = 0;
    5925             :                         }
    5926             : 
    5927           0 :                         if (ret > 0 && skinny_metadata) {
    5928             :                                 skinny_metadata = false;
    5929           0 :                                 key.objectid = bytenr;
    5930           0 :                                 key.type = BTRFS_EXTENT_ITEM_KEY;
    5931           0 :                                 key.offset = num_bytes;
    5932           0 :                                 btrfs_release_path(path);
    5933           0 :                                 ret = btrfs_search_slot(trans, extent_root,
    5934             :                                                         &key, path, -1, 1);
    5935             :                         }
    5936             : 
    5937           0 :                         if (ret) {
    5938           0 :                                 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
    5939             :                                         ret, bytenr);
    5940           0 :                                 if (ret > 0)
    5941           0 :                                         btrfs_print_leaf(extent_root,
    5942             :                                                          path->nodes[0]);
    5943             :                         }
    5944           0 :                         if (ret < 0) {
    5945           0 :                                 btrfs_abort_transaction(trans, extent_root, ret);
    5946           0 :                                 goto out;
    5947             :                         }
    5948           0 :                         extent_slot = path->slots[0];
    5949             :                 }
    5950           0 :         } else if (WARN_ON(ret == -ENOENT)) {
    5951           0 :                 btrfs_print_leaf(extent_root, path->nodes[0]);
    5952           0 :                 btrfs_err(info,
    5953             :                         "unable to find ref byte nr %llu parent %llu root %llu  owner %llu offset %llu",
    5954             :                         bytenr, parent, root_objectid, owner_objectid,
    5955             :                         owner_offset);
    5956           0 :                 btrfs_abort_transaction(trans, extent_root, ret);
    5957           0 :                 goto out;
    5958             :         } else {
    5959           0 :                 btrfs_abort_transaction(trans, extent_root, ret);
    5960           0 :                 goto out;
    5961             :         }
    5962             : 
    5963       71066 :         leaf = path->nodes[0];
    5964             :         item_size = btrfs_item_size_nr(leaf, extent_slot);
    5965             : #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
    5966       71066 :         if (item_size < sizeof(*ei)) {
    5967           0 :                 BUG_ON(found_extent || extent_slot != path->slots[0]);
    5968           0 :                 ret = convert_extent_item_v0(trans, extent_root, path,
    5969             :                                              owner_objectid, 0);
    5970           0 :                 if (ret < 0) {
    5971           0 :                         btrfs_abort_transaction(trans, extent_root, ret);
    5972           0 :                         goto out;
    5973             :                 }
    5974             : 
    5975           0 :                 btrfs_release_path(path);
    5976           0 :                 path->leave_spinning = 1;
    5977             : 
    5978           0 :                 key.objectid = bytenr;
    5979           0 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
    5980           0 :                 key.offset = num_bytes;
    5981             : 
    5982           0 :                 ret = btrfs_search_slot(trans, extent_root, &key, path,
    5983             :                                         -1, 1);
    5984           0 :                 if (ret) {
    5985           0 :                         btrfs_err(info, "umm, got %d back from search, was looking for %llu",
    5986             :                                 ret, bytenr);
    5987           0 :                         btrfs_print_leaf(extent_root, path->nodes[0]);
    5988             :                 }
    5989           0 :                 if (ret < 0) {
    5990           0 :                         btrfs_abort_transaction(trans, extent_root, ret);
    5991           0 :                         goto out;
    5992             :                 }
    5993             : 
    5994           0 :                 extent_slot = path->slots[0];
    5995           0 :                 leaf = path->nodes[0];
    5996             :                 item_size = btrfs_item_size_nr(leaf, extent_slot);
    5997             :         }
    5998             : #endif
    5999       71066 :         BUG_ON(item_size < sizeof(*ei));
    6000       71065 :         ei = btrfs_item_ptr(leaf, extent_slot,
    6001             :                             struct btrfs_extent_item);
    6002      120350 :         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
    6003       49285 :             key.type == BTRFS_EXTENT_ITEM_KEY) {
    6004             :                 struct btrfs_tree_block_info *bi;
    6005       49285 :                 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
    6006       49285 :                 bi = (struct btrfs_tree_block_info *)(ei + 1);
    6007       49285 :                 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
    6008             :         }
    6009             : 
    6010             :         refs = btrfs_extent_refs(leaf, ei);
    6011       71065 :         if (refs < refs_to_drop) {
    6012           0 :                 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
    6013             :                           "for bytenr %Lu", refs_to_drop, refs, bytenr);
    6014             :                 ret = -EINVAL;
    6015           0 :                 btrfs_abort_transaction(trans, extent_root, ret);
    6016           0 :                 goto out;
    6017             :         }
    6018       71065 :         refs -= refs_to_drop;
    6019             : 
    6020       71065 :         if (refs > 0) {
    6021             :                 type = BTRFS_QGROUP_OPER_SUB_SHARED;
    6022        6795 :                 if (extent_op)
    6023         298 :                         __run_delayed_extent_op(extent_op, leaf, ei);
    6024             :                 /*
    6025             :                  * In the case of inline back ref, reference count will
    6026             :                  * be updated by remove_extent_backref
    6027             :                  */
    6028        6795 :                 if (iref) {
    6029        6795 :                         BUG_ON(!found_extent);
    6030             :                 } else {
    6031             :                         btrfs_set_extent_refs(leaf, ei, refs);
    6032           0 :                         btrfs_mark_buffer_dirty(leaf);
    6033             :                 }
    6034        6795 :                 if (found_extent) {
    6035        6795 :                         ret = remove_extent_backref(trans, extent_root, path,
    6036             :                                                     iref, refs_to_drop,
    6037             :                                                     is_data, &last_ref);
    6038        6795 :                         if (ret) {
    6039           0 :                                 btrfs_abort_transaction(trans, extent_root, ret);
    6040           0 :                                 goto out;
    6041             :                         }
    6042             :                 }
    6043        6795 :                 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
    6044             :                                  root_objectid);
    6045             :         } else {
    6046       64270 :                 if (found_extent) {
    6047       79919 :                         BUG_ON(is_data && refs_to_drop !=
    6048             :                                extent_data_ref_count(root, path, iref));
    6049       64271 :                         if (iref) {
    6050       64271 :                                 BUG_ON(path->slots[0] != extent_slot);
    6051             :                         } else {
    6052           0 :                                 BUG_ON(path->slots[0] != extent_slot + 1);
    6053           0 :                                 path->slots[0] = extent_slot;
    6054             :                                 num_to_del = 2;
    6055             :                         }
    6056             :                 }
    6057             : 
    6058       64270 :                 last_ref = 1;
    6059       64270 :                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
    6060             :                                       num_to_del);
    6061       64271 :                 if (ret) {
    6062           0 :                         btrfs_abort_transaction(trans, extent_root, ret);
    6063           0 :                         goto out;
    6064             :                 }
    6065       64271 :                 btrfs_release_path(path);
    6066             : 
    6067       64271 :                 if (is_data) {
    6068       15648 :                         ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
    6069       15648 :                         if (ret) {
    6070           0 :                                 btrfs_abort_transaction(trans, extent_root, ret);
    6071           0 :                                 goto out;
    6072             :                         }
    6073             :                 }
    6074             : 
    6075       64271 :                 ret = update_block_group(root, bytenr, num_bytes, 0);
    6076       64271 :                 if (ret) {
    6077           0 :                         btrfs_abort_transaction(trans, extent_root, ret);
    6078           0 :                         goto out;
    6079             :                 }
    6080             :         }
    6081       71066 :         btrfs_release_path(path);
    6082             : 
    6083             :         /* Deal with the quota accounting */
    6084       71066 :         if (!ret && last_ref && !no_quota) {
    6085             :                 int mod_seq = 0;
    6086             : 
    6087        5568 :                 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
    6088        5568 :                     type == BTRFS_QGROUP_OPER_SUB_SHARED)
    6089             :                         mod_seq = 1;
    6090             : 
    6091        5568 :                 ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
    6092             :                                               bytenr, num_bytes, type,
    6093             :                                               mod_seq);
    6094             :         }
    6095             : out:
    6096       71066 :         btrfs_free_path(path);
    6097       71066 :         return ret;
    6098             : }
    6099             : 
    6100             : /*
    6101             :  * when we free an block, it is possible (and likely) that we free the last
    6102             :  * delayed ref for that extent as well.  This searches the delayed ref tree for
    6103             :  * a given extent, and if there are no other delayed refs to be processed, it
    6104             :  * removes it from the tree.
    6105             :  */
    6106       11390 : static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
    6107             :                                       struct btrfs_root *root, u64 bytenr)
    6108             : {
    6109             :         struct btrfs_delayed_ref_head *head;
    6110             :         struct btrfs_delayed_ref_root *delayed_refs;
    6111             :         int ret = 0;
    6112             : 
    6113       11390 :         delayed_refs = &trans->transaction->delayed_refs;
    6114             :         spin_lock(&delayed_refs->lock);
    6115       11390 :         head = btrfs_find_delayed_ref_head(trans, bytenr);
    6116       11390 :         if (!head)
    6117             :                 goto out_delayed_unlock;
    6118             : 
    6119             :         spin_lock(&head->lock);
    6120       11390 :         if (rb_first(&head->ref_root))
    6121             :                 goto out;
    6122             : 
    6123        2171 :         if (head->extent_op) {
    6124        2170 :                 if (!head->must_insert_reserved)
    6125             :                         goto out;
    6126             :                 btrfs_free_delayed_extent_op(head->extent_op);
    6127        2170 :                 head->extent_op = NULL;
    6128             :         }
    6129             : 
    6130             :         /*
    6131             :          * waiting for the lock here would deadlock.  If someone else has it
    6132             :          * locked they are already in the process of dropping it anyway
    6133             :          */
    6134        2171 :         if (!mutex_trylock(&head->mutex))
    6135             :                 goto out;
    6136             : 
    6137             :         /*
    6138             :          * at this point we have a head with no other entries.  Go
    6139             :          * ahead and process it.
    6140             :          */
    6141        2170 :         head->node.in_tree = 0;
    6142        2170 :         rb_erase(&head->href_node, &delayed_refs->href_root);
    6143             : 
    6144        2170 :         atomic_dec(&delayed_refs->num_entries);
    6145             : 
    6146             :         /*
    6147             :          * we don't take a ref on the node because we're removing it from the
    6148             :          * tree, so we just steal the ref the tree was holding.
    6149             :          */
    6150        2170 :         delayed_refs->num_heads--;
    6151        2170 :         if (head->processing == 0)
    6152        2170 :                 delayed_refs->num_heads_ready--;
    6153        2170 :         head->processing = 0;
    6154             :         spin_unlock(&head->lock);
    6155             :         spin_unlock(&delayed_refs->lock);
    6156             : 
    6157        2170 :         BUG_ON(head->extent_op);
    6158        2170 :         if (head->must_insert_reserved)
    6159             :                 ret = 1;
    6160             : 
    6161        2170 :         mutex_unlock(&head->mutex);
    6162        2170 :         btrfs_put_delayed_ref(&head->node);
    6163             :         return ret;
    6164             : out:
    6165             :         spin_unlock(&head->lock);
    6166             : 
    6167             : out_delayed_unlock:
    6168             :         spin_unlock(&delayed_refs->lock);
    6169             :         return 0;
    6170             : }
    6171             : 
    6172       54155 : void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
    6173             :                            struct btrfs_root *root,
    6174      157758 :                            struct extent_buffer *buf,
    6175             :                            u64 parent, int last_ref)
    6176             : {
    6177        1180 :         struct btrfs_block_group_cache *cache = NULL;
    6178             :         int pin = 1;
    6179             :         int ret;
    6180             : 
    6181       54155 :         if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
    6182      103138 :                 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
    6183       51569 :                                         buf->start, buf->len,
    6184             :                                         parent, root->root_key.objectid,
    6185             :                                         btrfs_header_level(buf),
    6186             :                                         BTRFS_DROP_DELAYED_REF, NULL, 0);
    6187       51570 :                 BUG_ON(ret); /* -ENOMEM */
    6188             :         }
    6189             : 
    6190       54156 :         if (!last_ref)
    6191       54156 :                 return;
    6192             : 
    6193       53685 :         cache = btrfs_lookup_block_group(root->fs_info, buf->start);
    6194             : 
    6195       53685 :         if (btrfs_header_generation(buf) == trans->transid) {
    6196       13975 :                 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
    6197       11390 :                         ret = check_ref_cleanup(trans, root, buf->start);
    6198       11390 :                         if (!ret)
    6199             :                                 goto out;
    6200             :                 }
    6201             : 
    6202        4755 :                 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
    6203        3575 :                         pin_down_extent(root, cache, buf->start, buf->len, 1);
    6204        3575 :                         goto out;
    6205             :                 }
    6206             : 
    6207        1180 :                 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
    6208             : 
    6209        1180 :                 btrfs_add_free_space(cache, buf->start, buf->len);
    6210        1180 :                 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
    6211        1180 :                 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
    6212             :                 pin = 0;
    6213             :         }
    6214             : out:
    6215       53684 :         if (pin)
    6216      105008 :                 add_pinned_bytes(root->fs_info, buf->len,
    6217             :                                  btrfs_header_level(buf),
    6218             :                                  root->root_key.objectid);
    6219             : 
    6220             :         /*
    6221             :          * Deleting the buffer, clear the corrupt flag since it doesn't matter
    6222             :          * anymore.
    6223             :          */
    6224             :         clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
    6225       53685 :         btrfs_put_block_group(cache);
    6226             : }
    6227             : 
    6228             : /* Can return -ENOMEM */
    6229       26762 : int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    6230             :                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
    6231             :                       u64 owner, u64 offset, int no_quota)
    6232             : {
    6233             :         int ret;
    6234       26762 :         struct btrfs_fs_info *fs_info = root->fs_info;
    6235             : 
    6236             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    6237             :         if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
    6238             :                 return 0;
    6239             : #endif
    6240       26762 :         add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
    6241             : 
    6242             :         /*
    6243             :          * tree log blocks never actually go into the extent allocation
    6244             :          * tree, just update pinning info and exit early.
    6245             :          */
    6246       26763 :         if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
    6247           0 :                 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
    6248             :                 /* unlocks the pinned mutex */
    6249           0 :                 btrfs_pin_extent(root, bytenr, num_bytes, 1);
    6250             :                 ret = 0;
    6251       26763 :         } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    6252         238 :                 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
    6253             :                                         num_bytes,
    6254             :                                         parent, root_objectid, (int)owner,
    6255             :                                         BTRFS_DROP_DELAYED_REF, NULL, no_quota);
    6256             :         } else {
    6257       26525 :                 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
    6258             :                                                 num_bytes,
    6259             :                                                 parent, root_objectid, owner,
    6260             :                                                 offset, BTRFS_DROP_DELAYED_REF,
    6261             :                                                 NULL, no_quota);
    6262             :         }
    6263       26763 :         return ret;
    6264             : }
    6265             : 
    6266             : static u64 stripe_align(struct btrfs_root *root,
    6267             :                         struct btrfs_block_group_cache *cache,
    6268             :                         u64 val, u64 num_bytes)
    6269             : {
    6270      112265 :         u64 ret = ALIGN(val, root->stripesize);
    6271             :         return ret;
    6272             : }
    6273             : 
    6274             : /*
    6275             :  * when we wait for progress in the block group caching, its because
    6276             :  * our allocation attempt failed at least once.  So, we must sleep
    6277             :  * and let some progress happen before we try again.
    6278             :  *
    6279             :  * This function will sleep at least once waiting for new free space to
    6280             :  * show up, and then it will check the block group free space numbers
    6281             :  * for our min num_bytes.  Another option is to have it go ahead
    6282             :  * and look in the rbtree for a free extent of a given size, but this
    6283             :  * is a good start.
    6284             :  *
    6285             :  * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
    6286             :  * any of the information in this block group.
    6287             :  */
    6288             : static noinline void
    6289         128 : wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
    6290             :                                 u64 num_bytes)
    6291             : {
    6292             :         struct btrfs_caching_control *caching_ctl;
    6293             : 
    6294         128 :         caching_ctl = get_caching_control(cache);
    6295         128 :         if (!caching_ctl)
    6296         128 :                 return;
    6297             : 
    6298         256 :         wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
    6299             :                    (cache->free_space_ctl->free_space >= num_bytes));
    6300             : 
    6301         128 :         put_caching_control(caching_ctl);
    6302             : }
    6303             : 
    6304             : static noinline int
    6305           0 : wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
    6306             : {
    6307             :         struct btrfs_caching_control *caching_ctl;
    6308             :         int ret = 0;
    6309             : 
    6310           0 :         caching_ctl = get_caching_control(cache);
    6311           0 :         if (!caching_ctl)
    6312           0 :                 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
    6313             : 
    6314           0 :         wait_event(caching_ctl->wait, block_group_cache_done(cache));
    6315           0 :         if (cache->cached == BTRFS_CACHE_ERROR)
    6316             :                 ret = -EIO;
    6317           0 :         put_caching_control(caching_ctl);
    6318           0 :         return ret;
    6319             : }
    6320             : 
    6321      213561 : int __get_raid_index(u64 flags)
    6322             : {
    6323      213561 :         if (flags & BTRFS_BLOCK_GROUP_RAID10)
    6324             :                 return BTRFS_RAID_RAID10;
    6325      213546 :         else if (flags & BTRFS_BLOCK_GROUP_RAID1)
    6326             :                 return BTRFS_RAID_RAID1;
    6327      208494 :         else if (flags & BTRFS_BLOCK_GROUP_DUP)
    6328             :                 return BTRFS_RAID_DUP;
    6329      126488 :         else if (flags & BTRFS_BLOCK_GROUP_RAID0)
    6330             :                 return BTRFS_RAID_RAID0;
    6331      124960 :         else if (flags & BTRFS_BLOCK_GROUP_RAID5)
    6332             :                 return BTRFS_RAID_RAID5;
    6333      124939 :         else if (flags & BTRFS_BLOCK_GROUP_RAID6)
    6334             :                 return BTRFS_RAID_RAID6;
    6335             : 
    6336      124920 :         return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
    6337             : }
    6338             : 
    6339           0 : int get_block_group_index(struct btrfs_block_group_cache *cache)
    6340             : {
    6341      101250 :         return __get_raid_index(cache->flags);
    6342             : }
    6343             : 
    6344             : static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
    6345             :         [BTRFS_RAID_RAID10]     = "raid10",
    6346             :         [BTRFS_RAID_RAID1]      = "raid1",
    6347             :         [BTRFS_RAID_DUP]        = "dup",
    6348             :         [BTRFS_RAID_RAID0]      = "raid0",
    6349             :         [BTRFS_RAID_SINGLE]     = "single",
    6350             :         [BTRFS_RAID_RAID5]      = "raid5",
    6351             :         [BTRFS_RAID_RAID6]      = "raid6",
    6352             : };
    6353             : 
    6354             : static const char *get_raid_name(enum btrfs_raid_types type)
    6355             : {
    6356        1090 :         if (type >= BTRFS_NR_RAID_TYPES)
    6357             :                 return NULL;
    6358             : 
    6359        1090 :         return btrfs_raid_type_names[type];
    6360             : }
    6361             : 
    6362             : enum btrfs_loop_type {
    6363             :         LOOP_CACHING_NOWAIT = 0,
    6364             :         LOOP_CACHING_WAIT = 1,
    6365             :         LOOP_ALLOC_CHUNK = 2,
    6366             :         LOOP_NO_EMPTY_SIZE = 3,
    6367             : };
    6368             : 
    6369             : static inline void
    6370             : btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
    6371             :                        int delalloc)
    6372             : {
    6373       56904 :         if (delalloc)
    6374       11998 :                 down_read(&cache->data_rwsem);
    6375             : }
    6376             : 
    6377             : static inline void
    6378       98349 : btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
    6379             :                        int delalloc)
    6380             : {
    6381             :         btrfs_get_block_group(cache);
    6382       98349 :         if (delalloc)
    6383       58480 :                 down_read(&cache->data_rwsem);
    6384       98349 : }
    6385             : 
    6386             : static struct btrfs_block_group_cache *
    6387       40871 : btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
    6388             :                    struct btrfs_free_cluster *cluster,
    6389             :                    int delalloc)
    6390             : {
    6391             :         struct btrfs_block_group_cache *used_bg;
    6392             :         bool locked = false;
    6393             : again:
    6394             :         spin_lock(&cluster->refill_lock);
    6395       40871 :         if (locked) {
    6396           0 :                 if (used_bg == cluster->block_group)
    6397             :                         return used_bg;
    6398             : 
    6399           0 :                 up_read(&used_bg->data_rwsem);
    6400           0 :                 btrfs_put_block_group(used_bg);
    6401             :         }
    6402             : 
    6403       40871 :         used_bg = cluster->block_group;
    6404       40871 :         if (!used_bg)
    6405             :                 return NULL;
    6406             : 
    6407       40506 :         if (used_bg == block_group)
    6408             :                 return used_bg;
    6409             : 
    6410             :         btrfs_get_block_group(used_bg);
    6411             : 
    6412          27 :         if (!delalloc)
    6413             :                 return used_bg;
    6414             : 
    6415           0 :         if (down_read_trylock(&used_bg->data_rwsem))
    6416             :                 return used_bg;
    6417             : 
    6418             :         spin_unlock(&cluster->refill_lock);
    6419           0 :         down_read(&used_bg->data_rwsem);
    6420             :         locked = true;
    6421           0 :         goto again;
    6422             : }
    6423             : 
    6424             : static inline void
    6425             : btrfs_release_block_group(struct btrfs_block_group_cache *cache,
    6426             :                          int delalloc)
    6427             : {
    6428      155274 :         if (delalloc)
    6429       70476 :                 up_read(&cache->data_rwsem);
    6430      155275 :         btrfs_put_block_group(cache);
    6431             : }
    6432             : 
    6433             : /*
    6434             :  * walks the btree of allocated extents and find a hole of a given size.
    6435             :  * The key ins is changed to record the hole:
    6436             :  * ins->objectid == start position
    6437             :  * ins->flags = BTRFS_EXTENT_ITEM_KEY
    6438             :  * ins->offset == the size of the hole.
    6439             :  * Any available blocks before search_start are skipped.
    6440             :  *
    6441             :  * If there is no suitable free space, we will record the max size of
    6442             :  * the free space extent currently.
    6443             :  */
    6444      112255 : static noinline int find_free_extent(struct btrfs_root *orig_root,
    6445             :                                      u64 num_bytes, u64 empty_size,
    6446             :                                      u64 hint_byte, struct btrfs_key *ins,
    6447             :                                      u64 flags, int delalloc)
    6448             : {
    6449             :         int ret = 0;
    6450      224520 :         struct btrfs_root *root = orig_root->fs_info->extent_root;
    6451             :         struct btrfs_free_cluster *last_ptr = NULL;
    6452      209820 :         struct btrfs_block_group_cache *block_group = NULL;
    6453             :         u64 search_start = 0;
    6454      112255 :         u64 max_extent_size = 0;
    6455             :         int empty_cluster = 2 * 1024 * 1024;
    6456             :         struct btrfs_space_info *space_info;
    6457             :         int loop = 0;
    6458      112255 :         int index = __get_raid_index(flags);
    6459      112255 :         int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
    6460      112255 :                 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
    6461             :         bool failed_cluster_refill = false;
    6462             :         bool failed_alloc = false;
    6463             :         bool use_cluster = true;
    6464             :         bool have_caching_bg = false;
    6465             : 
    6466      112255 :         WARN_ON(num_bytes < root->sectorsize);
    6467             :         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
    6468      112255 :         ins->objectid = 0;
    6469      112255 :         ins->offset = 0;
    6470             : 
    6471      112255 :         trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
    6472             : 
    6473      112258 :         space_info = __find_space_info(root->fs_info, flags);
    6474      112257 :         if (!space_info) {
    6475           0 :                 btrfs_err(root->fs_info, "No space info for %llu", flags);
    6476           0 :                 return -ENOSPC;
    6477             :         }
    6478             : 
    6479             :         /*
    6480             :          * If the space info is for both data and metadata it means we have a
    6481             :          * small filesystem and we can't use the clustering stuff.
    6482             :          */
    6483      112257 :         if (btrfs_mixed_space_info(space_info))
    6484             :                 use_cluster = false;
    6485             : 
    6486      112257 :         if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
    6487       40693 :                 last_ptr = &root->fs_info->meta_alloc_cluster;
    6488       40693 :                 if (!btrfs_test_opt(root, SSD))
    6489             :                         empty_cluster = 64 * 1024;
    6490             :         }
    6491             : 
    6492      162154 :         if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
    6493       49897 :             btrfs_test_opt(root, SSD)) {
    6494           0 :                 last_ptr = &root->fs_info->data_alloc_cluster;
    6495             :         }
    6496             : 
    6497      112257 :         if (last_ptr) {
    6498             :                 spin_lock(&last_ptr->lock);
    6499       40694 :                 if (last_ptr->block_group)
    6500       40506 :                         hint_byte = last_ptr->window_start;
    6501             :                 spin_unlock(&last_ptr->lock);
    6502             :         }
    6503             : 
    6504      112258 :         search_start = max(search_start, first_logical_byte(root, 0));
    6505      112265 :         search_start = max(search_start, hint_byte);
    6506             : 
    6507      112265 :         if (!last_ptr)
    6508             :                 empty_cluster = 0;
    6509             : 
    6510      112265 :         if (search_start == hint_byte) {
    6511      111481 :                 block_group = btrfs_lookup_block_group(root->fs_info,
    6512             :                                                        search_start);
    6513             :                 /*
    6514             :                  * we don't want to use the block group if it doesn't match our
    6515             :                  * allocation bits, or if its not cached.
    6516             :                  *
    6517             :                  * However if we are re-searching with an ideal block group
    6518             :                  * picked out then we don't care that the block group is cached.
    6519             :                  */
    6520      279886 :                 if (block_group && block_group_bits(block_group, flags) &&
    6521       56929 :                     block_group->cached != BTRFS_CACHE_NO) {
    6522       56926 :                         down_read(&space_info->groups_sem);
    6523      113872 :                         if (list_empty(&block_group->list) ||
    6524             :                             block_group->ro) {
    6525             :                                 /*
    6526             :                                  * someone is removing this block group,
    6527             :                                  * we can't jump into the have_block_group
    6528             :                                  * target because our list pointers are not
    6529             :                                  * valid
    6530             :                                  */
    6531          22 :                                 btrfs_put_block_group(block_group);
    6532          22 :                                 up_read(&space_info->groups_sem);
    6533             :                         } else {
    6534             :                                 index = get_block_group_index(block_group);
    6535             :                                 btrfs_lock_block_group(block_group, delalloc);
    6536             :                                 goto have_block_group;
    6537             :                         }
    6538       54558 :                 } else if (block_group) {
    6539       54547 :                         btrfs_put_block_group(block_group);
    6540             :                 }
    6541             :         }
    6542             : search:
    6543             :         have_caching_bg = false;
    6544       56560 :         down_read(&space_info->groups_sem);
    6545       99545 :         list_for_each_entry(block_group, &space_info->block_groups[index],
    6546             :                             list) {
    6547             :                 u64 offset;
    6548             :                 int cached;
    6549             : 
    6550       98349 :                 btrfs_grab_block_group(block_group, delalloc);
    6551       98347 :                 search_start = block_group->key.objectid;
    6552             : 
    6553             :                 /*
    6554             :                  * this can happen if we end up cycling through all the
    6555             :                  * raid types, but we want to make sure we only allocate
    6556             :                  * for the proper type.
    6557             :                  */
    6558       98347 :                 if (!block_group_bits(block_group, flags)) {
    6559             :                     u64 extra = BTRFS_BLOCK_GROUP_DUP |
    6560             :                                 BTRFS_BLOCK_GROUP_RAID1 |
    6561             :                                 BTRFS_BLOCK_GROUP_RAID5 |
    6562             :                                 BTRFS_BLOCK_GROUP_RAID6 |
    6563             :                                 BTRFS_BLOCK_GROUP_RAID10;
    6564             : 
    6565             :                         /*
    6566             :                          * if they asked for extra copies and this block group
    6567             :                          * doesn't provide them, bail.  This does allow us to
    6568             :                          * fill raid0 from raid1.
    6569             :                          */
    6570         114 :                         if ((flags & extra) && !(block_group->flags & extra))
    6571             :                                 goto loop;
    6572             :                 }
    6573             : 
    6574             : have_block_group:
    6575      155270 :                 cached = block_group_cache_done(block_group);
    6576      155266 :                 if (unlikely(!cached)) {
    6577         512 :                         ret = cache_block_group(block_group, 0);
    6578         512 :                         BUG_ON(ret < 0);
    6579             :                         ret = 0;
    6580             :                 }
    6581             : 
    6582      155266 :                 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
    6583             :                         goto loop;
    6584      155266 :                 if (unlikely(block_group->ro))
    6585             :                         goto loop;
    6586             : 
    6587             :                 /*
    6588             :                  * Ok we want to try and use the cluster allocator, so
    6589             :                  * lets look there
    6590             :                  */
    6591      155006 :                 if (last_ptr) {
    6592           5 :                         struct btrfs_block_group_cache *used_block_group;
    6593             :                         unsigned long aligned_cluster;
    6594             :                         /*
    6595             :                          * the refill lock keeps out other
    6596             :                          * people trying to start a new cluster
    6597             :                          */
    6598       40871 :                         used_block_group = btrfs_lock_cluster(block_group,
    6599             :                                                               last_ptr,
    6600             :                                                               delalloc);
    6601       40871 :                         if (!used_block_group)
    6602             :                                 goto refill_cluster;
    6603             : 
    6604       40533 :                         if (used_block_group != block_group &&
    6605          32 :                             (used_block_group->ro ||
    6606             :                              !block_group_bits(used_block_group, flags)))
    6607             :                                 goto release_cluster;
    6608             : 
    6609       40479 :                         offset = btrfs_alloc_from_cluster(used_block_group,
    6610             :                                                 last_ptr,
    6611             :                                                 num_bytes,
    6612             :                                                 used_block_group->key.objectid,
    6613             :                                                 &max_extent_size);
    6614       40479 :                         if (offset) {
    6615             :                                 /* we have a block, we're done */
    6616             :                                 spin_unlock(&last_ptr->refill_lock);
    6617       40477 :                                 trace_btrfs_reserve_extent_cluster(root,
    6618             :                                                 used_block_group,
    6619             :                                                 search_start, num_bytes);
    6620       40477 :                                 if (used_block_group != block_group) {
    6621             :                                         btrfs_release_block_group(block_group,
    6622             :                                                                   delalloc);
    6623             :                                         block_group = used_block_group;
    6624             :                                 }
    6625             :                                 goto checks;
    6626             :                         }
    6627             : 
    6628           2 :                         WARN_ON(last_ptr->block_group != used_block_group);
    6629             : release_cluster:
    6630             :                         /* If we are on LOOP_NO_EMPTY_SIZE, we can't
    6631             :                          * set up a new clusters, so lets just skip it
    6632             :                          * and let the allocator find whatever block
    6633             :                          * it can find.  If we reach this point, we
    6634             :                          * will have tried the cluster allocator
    6635             :                          * plenty of times and not have found
    6636             :                          * anything, so we are likely way too
    6637             :                          * fragmented for the clustering stuff to find
    6638             :                          * anything.
    6639             :                          *
    6640             :                          * However, if the cluster is taken from the
    6641             :                          * current block group, release the cluster
    6642             :                          * first, so that we stand a better chance of
    6643             :                          * succeeding in the unclustered
    6644             :                          * allocation.  */
    6645          58 :                         if (loop >= LOOP_NO_EMPTY_SIZE &&
    6646          29 :                             used_block_group != block_group) {
    6647             :                                 spin_unlock(&last_ptr->refill_lock);
    6648             :                                 btrfs_release_block_group(used_block_group,
    6649             :                                                           delalloc);
    6650             :                                 goto unclustered_alloc;
    6651             :                         }
    6652             : 
    6653             :                         /*
    6654             :                          * this cluster didn't work out, free it and
    6655             :                          * start over
    6656             :                          */
    6657          29 :                         btrfs_return_cluster_to_free_space(NULL, last_ptr);
    6658             : 
    6659          29 :                         if (used_block_group != block_group)
    6660             :                                 btrfs_release_block_group(used_block_group,
    6661             :                                                           delalloc);
    6662             : refill_cluster:
    6663         394 :                         if (loop >= LOOP_NO_EMPTY_SIZE) {
    6664             :                                 spin_unlock(&last_ptr->refill_lock);
    6665             :                                 goto unclustered_alloc;
    6666             :                         }
    6667             : 
    6668         394 :                         aligned_cluster = max_t(unsigned long,
    6669             :                                                 empty_cluster + empty_size,
    6670             :                                               block_group->full_stripe_len);
    6671             : 
    6672             :                         /* allocate a cluster in this block group */
    6673         394 :                         ret = btrfs_find_space_cluster(root, block_group,
    6674             :                                                        last_ptr, search_start,
    6675             :                                                        num_bytes,
    6676             :                                                        aligned_cluster);
    6677         394 :                         if (ret == 0) {
    6678             :                                 /*
    6679             :                                  * now pull our allocation out of this
    6680             :                                  * cluster
    6681             :                                  */
    6682         217 :                                 offset = btrfs_alloc_from_cluster(block_group,
    6683             :                                                         last_ptr,
    6684             :                                                         num_bytes,
    6685             :                                                         search_start,
    6686             :                                                         &max_extent_size);
    6687         217 :                                 if (offset) {
    6688             :                                         /* we found one, proceed */
    6689             :                                         spin_unlock(&last_ptr->refill_lock);
    6690         217 :                                         trace_btrfs_reserve_extent_cluster(root,
    6691             :                                                 block_group, search_start,
    6692             :                                                 num_bytes);
    6693         217 :                                         goto checks;
    6694             :                                 }
    6695         177 :                         } else if (!cached && loop > LOOP_CACHING_NOWAIT
    6696          76 :                                    && !failed_cluster_refill) {
    6697             :                                 spin_unlock(&last_ptr->refill_lock);
    6698             : 
    6699             :                                 failed_cluster_refill = true;
    6700          76 :                                 wait_block_group_cache_progress(block_group,
    6701          76 :                                        num_bytes + empty_cluster + empty_size);
    6702          76 :                                 goto have_block_group;
    6703             :                         }
    6704             : 
    6705             :                         /*
    6706             :                          * at this point we either didn't find a cluster
    6707             :                          * or we weren't able to allocate a block from our
    6708             :                          * cluster.  Free the cluster we've been trying
    6709             :                          * to use, and go to the next block group
    6710             :                          */
    6711         101 :                         btrfs_return_cluster_to_free_space(NULL, last_ptr);
    6712             :                         spin_unlock(&last_ptr->refill_lock);
    6713             :                         goto loop;
    6714             :                 }
    6715             : 
    6716             : unclustered_alloc:
    6717      114135 :                 spin_lock(&block_group->free_space_ctl->tree_lock);
    6718      228021 :                 if (cached &&
    6719      113889 :                     block_group->free_space_ctl->free_space <
    6720      113889 :                     num_bytes + empty_cluster + empty_size) {
    6721       41516 :                         if (block_group->free_space_ctl->free_space >
    6722             :                             max_extent_size)
    6723        5019 :                                 max_extent_size =
    6724             :                                         block_group->free_space_ctl->free_space;
    6725             :                         spin_unlock(&block_group->free_space_ctl->tree_lock);
    6726             :                         goto loop;
    6727             :                 }
    6728       72616 :                 spin_unlock(&block_group->free_space_ctl->tree_lock);
    6729             : 
    6730       72622 :                 offset = btrfs_find_space_for_alloc(block_group, search_start,
    6731             :                                                     num_bytes, empty_size,
    6732             :                                                     &max_extent_size);
    6733             :                 /*
    6734             :                  * If we didn't find a chunk, and we haven't failed on this
    6735             :                  * block group before, and this block group is in the middle of
    6736             :                  * caching and we are ok with waiting, then go ahead and wait
    6737             :                  * for progress to be made, and set failed_alloc to true.
    6738             :                  *
    6739             :                  * If failed_alloc is true then we've already waited on this
    6740             :                  * block group once and should move on to the next block group.
    6741             :                  */
    6742       73678 :                 if (!offset && !failed_alloc && !cached &&
    6743        1053 :                     loop > LOOP_CACHING_NOWAIT) {
    6744          52 :                         wait_block_group_cache_progress(block_group,
    6745             :                                                 num_bytes + empty_size);
    6746             :                         failed_alloc = true;
    6747          52 :                         goto have_block_group;
    6748       72573 :                 } else if (!offset) {
    6749        1001 :                         if (!cached)
    6750             :                                 have_caching_bg = true;
    6751             :                         goto loop;
    6752             :                 }
    6753             : checks:
    6754             :                 search_start = stripe_align(root, block_group,
    6755             :                                             offset, num_bytes);
    6756             : 
    6757             :                 /* move on to the next group */
    6758      224530 :                 if (search_start + num_bytes >
    6759      112265 :                     block_group->key.objectid + block_group->key.offset) {
    6760             :                         btrfs_add_free_space(block_group, offset, num_bytes);
    6761             :                         goto loop;
    6762             :                 }
    6763             : 
    6764      112265 :                 if (offset < search_start)
    6765           0 :                         btrfs_add_free_space(block_group, offset,
    6766             :                                              search_start - offset);
    6767      112264 :                 BUG_ON(offset > search_start);
    6768             : 
    6769      112264 :                 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
    6770             :                                                   alloc_type, delalloc);
    6771      112262 :                 if (ret == -EAGAIN) {
    6772             :                         btrfs_add_free_space(block_group, offset, num_bytes);
    6773             :                         goto loop;
    6774             :                 }
    6775             : 
    6776             :                 /* we are all good, lets return */
    6777      112262 :                 ins->objectid = search_start;
    6778      112262 :                 ins->offset = num_bytes;
    6779             : 
    6780      112262 :                 trace_btrfs_reserve_extent(orig_root, block_group,
    6781             :                                            search_start, num_bytes);
    6782             :                 btrfs_release_block_group(block_group, delalloc);
    6783             :                 break;
    6784             : loop:
    6785             :                 failed_cluster_refill = false;
    6786             :                 failed_alloc = false;
    6787       42986 :                 BUG_ON(index != get_block_group_index(block_group));
    6788             :                 btrfs_release_block_group(block_group, delalloc);
    6789             :         }
    6790      113460 :         up_read(&space_info->groups_sem);
    6791             : 
    6792      113460 :         if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
    6793             :                 goto search;
    6794             : 
    6795      113459 :         if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
    6796             :                 goto search;
    6797             : 
    6798             :         /*
    6799             :          * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
    6800             :          *                      caching kthreads as we move along
    6801             :          * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
    6802             :          * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
    6803             :          * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
    6804             :          *                      again
    6805             :          */
    6806      112434 :         if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
    6807             :                 index = 0;
    6808         170 :                 loop++;
    6809         170 :                 if (loop == LOOP_ALLOC_CHUNK) {
    6810             :                         struct btrfs_trans_handle *trans;
    6811             :                         int exist = 0;
    6812             : 
    6813           1 :                         trans = current->journal_info;
    6814           1 :                         if (trans)
    6815             :                                 exist = 1;
    6816             :                         else
    6817           1 :                                 trans = btrfs_join_transaction(root);
    6818             : 
    6819           1 :                         if (IS_ERR(trans)) {
    6820           0 :                                 ret = PTR_ERR(trans);
    6821           0 :                                 goto out;
    6822             :                         }
    6823             : 
    6824           1 :                         ret = do_chunk_alloc(trans, root, flags,
    6825             :                                              CHUNK_ALLOC_FORCE);
    6826             :                         /*
    6827             :                          * Do not bail out on ENOSPC since we
    6828             :                          * can do more things.
    6829             :                          */
    6830           1 :                         if (ret < 0 && ret != -ENOSPC)
    6831           0 :                                 btrfs_abort_transaction(trans,
    6832             :                                                         root, ret);
    6833             :                         else
    6834             :                                 ret = 0;
    6835           2 :                         if (!exist)
    6836           1 :                                 btrfs_end_transaction(trans, root);
    6837           2 :                         if (ret)
    6838             :                                 goto out;
    6839             :                 }
    6840             : 
    6841         170 :                 if (loop == LOOP_NO_EMPTY_SIZE) {
    6842             :                         empty_size = 0;
    6843             :                         empty_cluster = 0;
    6844             :                 }
    6845             : 
    6846             :                 goto search;
    6847      112264 :         } else if (!ins->objectid) {
    6848             :                 ret = -ENOSPC;
    6849      112264 :         } else if (ins->objectid) {
    6850             :                 ret = 0;
    6851             :         }
    6852             : out:
    6853      112265 :         if (ret == -ENOSPC)
    6854           0 :                 ins->offset = max_extent_size;
    6855      112265 :         return ret;
    6856             : }
    6857             : 
    6858           0 : static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
    6859             :                             int dump_block_groups)
    6860             : {
    6861             :         struct btrfs_block_group_cache *cache;
    6862             :         int index = 0;
    6863             : 
    6864             :         spin_lock(&info->lock);
    6865           0 :         printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
    6866             :                info->flags,
    6867           0 :                info->total_bytes - info->bytes_used - info->bytes_pinned -
    6868           0 :                info->bytes_reserved - info->bytes_readonly,
    6869           0 :                (info->full) ? "" : "not ");
    6870           0 :         printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
    6871             :                "reserved=%llu, may_use=%llu, readonly=%llu\n",
    6872             :                info->total_bytes, info->bytes_used, info->bytes_pinned,
    6873             :                info->bytes_reserved, info->bytes_may_use,
    6874             :                info->bytes_readonly);
    6875             :         spin_unlock(&info->lock);
    6876             : 
    6877           0 :         if (!dump_block_groups)
    6878           0 :                 return;
    6879             : 
    6880           0 :         down_read(&info->groups_sem);
    6881             : again:
    6882           0 :         list_for_each_entry(cache, &info->block_groups[index], list) {
    6883             :                 spin_lock(&cache->lock);
    6884           0 :                 printk(KERN_INFO "BTRFS: "
    6885             :                            "block group %llu has %llu bytes, "
    6886             :                            "%llu used %llu pinned %llu reserved %s\n",
    6887             :                        cache->key.objectid, cache->key.offset,
    6888             :                        btrfs_block_group_used(&cache->item), cache->pinned,
    6889           0 :                        cache->reserved, cache->ro ? "[readonly]" : "");
    6890           0 :                 btrfs_dump_free_space(cache, bytes);
    6891             :                 spin_unlock(&cache->lock);
    6892             :         }
    6893           0 :         if (++index < BTRFS_NR_RAID_TYPES)
    6894             :                 goto again;
    6895           0 :         up_read(&info->groups_sem);
    6896             : }
    6897             : 
    6898      112259 : int btrfs_reserve_extent(struct btrfs_root *root,
    6899             :                          u64 num_bytes, u64 min_alloc_size,
    6900             :                          u64 empty_size, u64 hint_byte,
    6901             :                          struct btrfs_key *ins, int is_data, int delalloc)
    6902             : {
    6903             :         bool final_tried = false;
    6904             :         u64 flags;
    6905             :         int ret;
    6906             : 
    6907      112259 :         flags = btrfs_get_alloc_profile(root, is_data);
    6908             : again:
    6909      112262 :         WARN_ON(num_bytes < root->sectorsize);
    6910      112262 :         ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
    6911             :                                flags, delalloc);
    6912             : 
    6913      112264 :         if (ret == -ENOSPC) {
    6914           0 :                 if (!final_tried && ins->offset) {
    6915           0 :                         num_bytes = min(num_bytes >> 1, ins->offset);
    6916           0 :                         num_bytes = round_down(num_bytes, root->sectorsize);
    6917           0 :                         num_bytes = max(num_bytes, min_alloc_size);
    6918           0 :                         if (num_bytes == min_alloc_size)
    6919             :                                 final_tried = true;
    6920             :                         goto again;
    6921           0 :                 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
    6922             :                         struct btrfs_space_info *sinfo;
    6923             : 
    6924             :                         sinfo = __find_space_info(root->fs_info, flags);
    6925           0 :                         btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
    6926             :                                 flags, num_bytes);
    6927           0 :                         if (sinfo)
    6928           0 :                                 dump_space_info(sinfo, num_bytes, 1);
    6929             :                 }
    6930             :         }
    6931             : 
    6932      112264 :         return ret;
    6933             : }
    6934             : 
    6935         505 : static int __btrfs_free_reserved_extent(struct btrfs_root *root,
    6936             :                                         u64 start, u64 len,
    6937             :                                         int pin, int delalloc)
    6938             : {
    6939           0 :         struct btrfs_block_group_cache *cache;
    6940             :         int ret = 0;
    6941             : 
    6942         505 :         cache = btrfs_lookup_block_group(root->fs_info, start);
    6943         505 :         if (!cache) {
    6944           0 :                 btrfs_err(root->fs_info, "Unable to find block group for %llu",
    6945             :                         start);
    6946           0 :                 return -ENOSPC;
    6947             :         }
    6948             : 
    6949         505 :         if (btrfs_test_opt(root, DISCARD))
    6950           0 :                 ret = btrfs_discard_extent(root, start, len, NULL);
    6951             : 
    6952         505 :         if (pin)
    6953         505 :                 pin_down_extent(root, cache, start, len, 1);
    6954             :         else {
    6955             :                 btrfs_add_free_space(cache, start, len);
    6956           0 :                 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
    6957             :         }
    6958         505 :         btrfs_put_block_group(cache);
    6959             : 
    6960         505 :         trace_btrfs_reserved_extent_free(root, start, len);
    6961             : 
    6962         505 :         return ret;
    6963             : }
    6964             : 
    6965           0 : int btrfs_free_reserved_extent(struct btrfs_root *root,
    6966             :                                u64 start, u64 len, int delalloc)
    6967             : {
    6968           0 :         return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
    6969             : }
    6970             : 
    6971         505 : int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
    6972             :                                        u64 start, u64 len)
    6973             : {
    6974         505 :         return __btrfs_free_reserved_extent(root, start, len, 1, 0);
    6975             : }
    6976             : 
    6977       52711 : static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
    6978             :                                       struct btrfs_root *root,
    6979             :                                       u64 parent, u64 root_objectid,
    6980             :                                       u64 flags, u64 owner, u64 offset,
    6981             :                                       struct btrfs_key *ins, int ref_mod)
    6982             : {
    6983             :         int ret;
    6984       52711 :         struct btrfs_fs_info *fs_info = root->fs_info;
    6985             :         struct btrfs_extent_item *extent_item;
    6986             :         struct btrfs_extent_inline_ref *iref;
    6987             :         struct btrfs_path *path;
    6988             :         struct extent_buffer *leaf;
    6989             :         int type;
    6990             :         u32 size;
    6991             : 
    6992       52711 :         if (parent > 0)
    6993             :                 type = BTRFS_SHARED_DATA_REF_KEY;
    6994             :         else
    6995             :                 type = BTRFS_EXTENT_DATA_REF_KEY;
    6996             : 
    6997       52711 :         size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
    6998             : 
    6999       52711 :         path = btrfs_alloc_path();
    7000       52711 :         if (!path)
    7001             :                 return -ENOMEM;
    7002             : 
    7003       52711 :         path->leave_spinning = 1;
    7004       52711 :         ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
    7005             :                                       ins, size);
    7006       52711 :         if (ret) {
    7007           0 :                 btrfs_free_path(path);
    7008           0 :                 return ret;
    7009             :         }
    7010             : 
    7011       52711 :         leaf = path->nodes[0];
    7012      105422 :         extent_item = btrfs_item_ptr(leaf, path->slots[0],
    7013             :                                      struct btrfs_extent_item);
    7014       52711 :         btrfs_set_extent_refs(leaf, extent_item, ref_mod);
    7015       52711 :         btrfs_set_extent_generation(leaf, extent_item, trans->transid);
    7016       52711 :         btrfs_set_extent_flags(leaf, extent_item,
    7017             :                                flags | BTRFS_EXTENT_FLAG_DATA);
    7018             : 
    7019       52711 :         iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
    7020       52711 :         btrfs_set_extent_inline_ref_type(leaf, iref, type);
    7021       52711 :         if (parent > 0) {
    7022             :                 struct btrfs_shared_data_ref *ref;
    7023           0 :                 ref = (struct btrfs_shared_data_ref *)(iref + 1);
    7024             :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    7025           0 :                 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
    7026             :         } else {
    7027             :                 struct btrfs_extent_data_ref *ref;
    7028       52711 :                 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
    7029             :                 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
    7030             :                 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
    7031             :                 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
    7032       52711 :                 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
    7033             :         }
    7034             : 
    7035       52711 :         btrfs_mark_buffer_dirty(path->nodes[0]);
    7036       52711 :         btrfs_free_path(path);
    7037             : 
    7038             :         /* Always set parent to 0 here since its exclusive anyway. */
    7039       52710 :         ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
    7040             :                                       ins->objectid, ins->offset,
    7041             :                                       BTRFS_QGROUP_OPER_ADD_EXCL, 0);
    7042       52710 :         if (ret)
    7043             :                 return ret;
    7044             : 
    7045       52710 :         ret = update_block_group(root, ins->objectid, ins->offset, 1);
    7046       52711 :         if (ret) { /* -ENOENT, logic error */
    7047           0 :                 btrfs_err(fs_info, "update block group failed for %llu %llu",
    7048             :                         ins->objectid, ins->offset);
    7049           0 :                 BUG();
    7050             :         }
    7051       52711 :         trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
    7052       52711 :         return ret;
    7053             : }
    7054             : 
    7055       53373 : static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
    7056             :                                      struct btrfs_root *root,
    7057             :                                      u64 parent, u64 root_objectid,
    7058             :                                      u64 flags, struct btrfs_disk_key *key,
    7059             :                                      int level, struct btrfs_key *ins,
    7060             :                                      int no_quota)
    7061             : {
    7062             :         int ret;
    7063       53373 :         struct btrfs_fs_info *fs_info = root->fs_info;
    7064             :         struct btrfs_extent_item *extent_item;
    7065             :         struct btrfs_tree_block_info *block_info;
    7066             :         struct btrfs_extent_inline_ref *iref;
    7067             :         struct btrfs_path *path;
    7068             :         struct extent_buffer *leaf;
    7069             :         u32 size = sizeof(*extent_item) + sizeof(*iref);
    7070       53373 :         u64 num_bytes = ins->offset;
    7071       53373 :         bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
    7072             :                                                  SKINNY_METADATA);
    7073             : 
    7074       53373 :         if (!skinny_metadata)
    7075             :                 size += sizeof(*block_info);
    7076             : 
    7077       53373 :         path = btrfs_alloc_path();
    7078       53373 :         if (!path) {
    7079           0 :                 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
    7080           0 :                                                    root->leafsize);
    7081           0 :                 return -ENOMEM;
    7082             :         }
    7083             : 
    7084       53373 :         path->leave_spinning = 1;
    7085       53373 :         ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
    7086             :                                       ins, size);
    7087       53374 :         if (ret) {
    7088           0 :                 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
    7089           0 :                                                    root->leafsize);
    7090           0 :                 btrfs_free_path(path);
    7091           0 :                 return ret;
    7092             :         }
    7093             : 
    7094       53374 :         leaf = path->nodes[0];
    7095      106748 :         extent_item = btrfs_item_ptr(leaf, path->slots[0],
    7096             :                                      struct btrfs_extent_item);
    7097             :         btrfs_set_extent_refs(leaf, extent_item, 1);
    7098       53373 :         btrfs_set_extent_generation(leaf, extent_item, trans->transid);
    7099       53373 :         btrfs_set_extent_flags(leaf, extent_item,
    7100             :                                flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
    7101             : 
    7102       53373 :         if (skinny_metadata) {
    7103           0 :                 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
    7104           0 :                 num_bytes = root->leafsize;
    7105             :         } else {
    7106       53373 :                 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
    7107             :                 btrfs_set_tree_block_key(leaf, block_info, key);
    7108       53373 :                 btrfs_set_tree_block_level(leaf, block_info, level);
    7109       53373 :                 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
    7110             :         }
    7111             : 
    7112       53373 :         if (parent > 0) {
    7113         457 :                 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
    7114             :                 btrfs_set_extent_inline_ref_type(leaf, iref,
    7115             :                                                  BTRFS_SHARED_BLOCK_REF_KEY);
    7116             :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    7117             :         } else {
    7118             :                 btrfs_set_extent_inline_ref_type(leaf, iref,
    7119             :                                                  BTRFS_TREE_BLOCK_REF_KEY);
    7120             :                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
    7121             :         }
    7122             : 
    7123       53373 :         btrfs_mark_buffer_dirty(leaf);
    7124       53374 :         btrfs_free_path(path);
    7125             : 
    7126       53374 :         if (!no_quota) {
    7127       53374 :                 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
    7128             :                                               ins->objectid, num_bytes,
    7129             :                                               BTRFS_QGROUP_OPER_ADD_EXCL, 0);
    7130       53374 :                 if (ret)
    7131             :                         return ret;
    7132             :         }
    7133             : 
    7134       53374 :         ret = update_block_group(root, ins->objectid, root->leafsize, 1);
    7135       53373 :         if (ret) { /* -ENOENT, logic error */
    7136           0 :                 btrfs_err(fs_info, "update block group failed for %llu %llu",
    7137             :                         ins->objectid, ins->offset);
    7138           0 :                 BUG();
    7139             :         }
    7140             : 
    7141       53373 :         trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize);
    7142       53373 :         return ret;
    7143             : }
    7144             : 
    7145       53378 : int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
    7146             :                                      struct btrfs_root *root,
    7147             :                                      u64 root_objectid, u64 owner,
    7148             :                                      u64 offset, struct btrfs_key *ins)
    7149             : {
    7150             :         int ret;
    7151             : 
    7152       53378 :         BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
    7153             : 
    7154       53378 :         ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
    7155             :                                          ins->offset, 0,
    7156             :                                          root_objectid, owner, offset,
    7157             :                                          BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
    7158       53380 :         return ret;
    7159             : }
    7160             : 
    7161             : /*
    7162             :  * this is used by the tree logging recovery code.  It records that
    7163             :  * an extent has been allocated and makes sure to clear the free
    7164             :  * space cache bits as well
    7165             :  */
    7166           0 : int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
    7167             :                                    struct btrfs_root *root,
    7168             :                                    u64 root_objectid, u64 owner, u64 offset,
    7169             :                                    struct btrfs_key *ins)
    7170             : {
    7171             :         int ret;
    7172             :         struct btrfs_block_group_cache *block_group;
    7173             : 
    7174             :         /*
    7175             :          * Mixed block groups will exclude before processing the log so we only
    7176             :          * need to do the exlude dance if this fs isn't mixed.
    7177             :          */
    7178           0 :         if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
    7179           0 :                 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
    7180           0 :                 if (ret)
    7181             :                         return ret;
    7182             :         }
    7183             : 
    7184           0 :         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
    7185           0 :         if (!block_group)
    7186             :                 return -EINVAL;
    7187             : 
    7188           0 :         ret = btrfs_update_reserved_bytes(block_group, ins->offset,
    7189             :                                           RESERVE_ALLOC_NO_ACCOUNT, 0);
    7190           0 :         BUG_ON(ret); /* logic error */
    7191           0 :         ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
    7192             :                                          0, owner, offset, ins, 1);
    7193           0 :         btrfs_put_block_group(block_group);
    7194           0 :         return ret;
    7195             : }
    7196             : 
    7197             : static struct extent_buffer *
    7198       58886 : btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    7199             :                       u64 bytenr, u32 blocksize, int level)
    7200             : {
    7201       58885 :         struct extent_buffer *buf;
    7202             : 
    7203       58886 :         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
    7204       58885 :         if (!buf)
    7205             :                 return ERR_PTR(-ENOMEM);
    7206       58885 :         btrfs_set_header_generation(buf, trans->transid);
    7207             :         btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
    7208       58885 :         btrfs_tree_lock(buf);
    7209       58886 :         clean_tree_block(trans, root, buf);
    7210             :         clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
    7211             : 
    7212             :         btrfs_set_lock_blocking(buf);
    7213       58885 :         btrfs_set_buffer_uptodate(buf);
    7214             : 
    7215       58885 :         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
    7216             :                 /*
    7217             :                  * we allow two log transactions at a time, use different
    7218             :                  * EXENT bit to differentiate dirty pages.
    7219             :                  */
    7220        3108 :                 if (root->log_transid % 2 == 0)
    7221        1724 :                         set_extent_dirty(&root->dirty_log_pages, buf->start,
    7222        1724 :                                         buf->start + buf->len - 1, GFP_NOFS);
    7223             :                 else
    7224        1384 :                         set_extent_new(&root->dirty_log_pages, buf->start,
    7225        1384 :                                         buf->start + buf->len - 1, GFP_NOFS);
    7226             :         } else {
    7227       55777 :                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
    7228       55777 :                          buf->start + buf->len - 1, GFP_NOFS);
    7229             :         }
    7230       58886 :         trans->blocks_used++;
    7231             :         /* this returns a buffer locked for blocking */
    7232             :         return buf;
    7233             : }
    7234             : 
    7235             : static struct btrfs_block_rsv *
    7236       58884 : use_block_rsv(struct btrfs_trans_handle *trans,
    7237             :               struct btrfs_root *root, u32 blocksize)
    7238             : {
    7239             :         struct btrfs_block_rsv *block_rsv;
    7240       58884 :         struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
    7241             :         int ret;
    7242             :         bool global_updated = false;
    7243             : 
    7244       58884 :         block_rsv = get_block_rsv(trans, root);
    7245             : 
    7246       58884 :         if (unlikely(block_rsv->size == 0))
    7247             :                 goto try_reserve;
    7248             : again:
    7249       55395 :         ret = block_rsv_use_bytes(block_rsv, blocksize);
    7250       55397 :         if (!ret)
    7251             :                 return block_rsv;
    7252             : 
    7253          13 :         if (block_rsv->failfast)
    7254           0 :                 return ERR_PTR(ret);
    7255             : 
    7256          13 :         if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
    7257             :                 global_updated = true;
    7258           0 :                 update_global_block_rsv(root->fs_info);
    7259           0 :                 goto again;
    7260             :         }
    7261             : 
    7262          13 :         if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
    7263             :                 static DEFINE_RATELIMIT_STATE(_rs,
    7264             :                                 DEFAULT_RATELIMIT_INTERVAL * 10,
    7265             :                                 /*DEFAULT_RATELIMIT_BURST*/ 1);
    7266           0 :                 if (__ratelimit(&_rs))
    7267           0 :                         WARN(1, KERN_DEBUG
    7268             :                                 "BTRFS: block rsv returned %d\n", ret);
    7269             :         }
    7270             : try_reserve:
    7271        3502 :         ret = reserve_metadata_bytes(root, block_rsv, blocksize,
    7272             :                                      BTRFS_RESERVE_NO_FLUSH);
    7273        3502 :         if (!ret)
    7274             :                 return block_rsv;
    7275             :         /*
    7276             :          * If we couldn't reserve metadata bytes try and use some from
    7277             :          * the global reserve if its space type is the same as the global
    7278             :          * reservation.
    7279             :          */
    7280           0 :         if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
    7281           0 :             block_rsv->space_info == global_rsv->space_info) {
    7282           0 :                 ret = block_rsv_use_bytes(global_rsv, blocksize);
    7283           0 :                 if (!ret)
    7284             :                         return global_rsv;
    7285             :         }
    7286           0 :         return ERR_PTR(ret);
    7287             : }
    7288             : 
    7289           0 : static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
    7290             :                             struct btrfs_block_rsv *block_rsv, u32 blocksize)
    7291             : {
    7292           0 :         block_rsv_add_bytes(block_rsv, blocksize, 0);
    7293           0 :         block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
    7294           0 : }
    7295             : 
    7296             : /*
    7297             :  * finds a free extent and does all the dirty work required for allocation
    7298             :  * returns the key for the extent through ins, and a tree buffer for
    7299             :  * the first block of the extent through buf.
    7300             :  *
    7301             :  * returns the tree buffer or NULL.
    7302             :  */
    7303       58884 : struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
    7304             :                                         struct btrfs_root *root, u32 blocksize,
    7305             :                                         u64 parent, u64 root_objectid,
    7306             :                                         struct btrfs_disk_key *key, int level,
    7307             :                                         u64 hint, u64 empty_size)
    7308             : {
    7309             :         struct btrfs_key ins;
    7310             :         struct btrfs_block_rsv *block_rsv;
    7311             :         struct extent_buffer *buf;
    7312             :         u64 flags = 0;
    7313             :         int ret;
    7314       58884 :         bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
    7315             :                                                  SKINNY_METADATA);
    7316             : 
    7317             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    7318             :         if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) {
    7319             :                 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
    7320             :                                             blocksize, level);
    7321             :                 if (!IS_ERR(buf))
    7322             :                         root->alloc_bytenr += blocksize;
    7323             :                 return buf;
    7324             :         }
    7325             : #endif
    7326       58884 :         block_rsv = use_block_rsv(trans, root, blocksize);
    7327       58886 :         if (IS_ERR(block_rsv))
    7328             :                 return ERR_CAST(block_rsv);
    7329             : 
    7330       58886 :         ret = btrfs_reserve_extent(root, blocksize, blocksize,
    7331             :                                    empty_size, hint, &ins, 0, 0);
    7332       58886 :         if (ret) {
    7333           0 :                 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
    7334           0 :                 return ERR_PTR(ret);
    7335             :         }
    7336             : 
    7337       58886 :         buf = btrfs_init_new_buffer(trans, root, ins.objectid,
    7338             :                                     blocksize, level);
    7339       58886 :         BUG_ON(IS_ERR(buf)); /* -ENOMEM */
    7340             : 
    7341       58886 :         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
    7342         457 :                 if (parent == 0)
    7343         441 :                         parent = ins.objectid;
    7344             :                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
    7345             :         } else
    7346       58429 :                 BUG_ON(parent > 0);
    7347             : 
    7348       58886 :         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
    7349             :                 struct btrfs_delayed_extent_op *extent_op;
    7350             :                 extent_op = btrfs_alloc_delayed_extent_op();
    7351       55778 :                 BUG_ON(!extent_op); /* -ENOMEM */
    7352       55778 :                 if (key)
    7353       55624 :                         memcpy(&extent_op->key, key, sizeof(extent_op->key));
    7354             :                 else
    7355         154 :                         memset(&extent_op->key, 0, sizeof(extent_op->key));
    7356       55778 :                 extent_op->flags_to_set = flags;
    7357       55778 :                 if (skinny_metadata)
    7358           0 :                         extent_op->update_key = 0;
    7359             :                 else
    7360       55778 :                         extent_op->update_key = 1;
    7361       55778 :                 extent_op->update_flags = 1;
    7362       55778 :                 extent_op->is_data = 0;
    7363       55778 :                 extent_op->level = level;
    7364             : 
    7365       55778 :                 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
    7366             :                                         ins.objectid,
    7367             :                                         ins.offset, parent, root_objectid,
    7368             :                                         level, BTRFS_ADD_DELAYED_EXTENT,
    7369             :                                         extent_op, 0);
    7370       55778 :                 BUG_ON(ret); /* -ENOMEM */
    7371             :         }
    7372       58886 :         return buf;
    7373             : }
    7374             : 
    7375             : struct walk_control {
    7376             :         u64 refs[BTRFS_MAX_LEVEL];
    7377             :         u64 flags[BTRFS_MAX_LEVEL];
    7378             :         struct btrfs_key update_progress;
    7379             :         int stage;
    7380             :         int level;
    7381             :         int shared_level;
    7382             :         int update_ref;
    7383             :         int keep_locks;
    7384             :         int reada_slot;
    7385             :         int reada_count;
    7386             :         int for_reloc;
    7387             : };
    7388             : 
    7389             : #define DROP_REFERENCE  1
    7390             : #define UPDATE_BACKREF  2
    7391             : 
    7392           0 : static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
    7393           0 :                                      struct btrfs_root *root,
    7394             :                                      struct walk_control *wc,
    7395             :                                      struct btrfs_path *path)
    7396             : {
    7397             :         u64 bytenr;
    7398             :         u64 generation;
    7399             :         u64 refs;
    7400             :         u64 flags;
    7401             :         u32 nritems;
    7402             :         u32 blocksize;
    7403             :         struct btrfs_key key;
    7404           0 :         struct extent_buffer *eb;
    7405             :         int ret;
    7406             :         int slot;
    7407             :         int nread = 0;
    7408             : 
    7409           0 :         if (path->slots[wc->level] < wc->reada_slot) {
    7410           0 :                 wc->reada_count = wc->reada_count * 2 / 3;
    7411           0 :                 wc->reada_count = max(wc->reada_count, 2);
    7412             :         } else {
    7413           0 :                 wc->reada_count = wc->reada_count * 3 / 2;
    7414           0 :                 wc->reada_count = min_t(int, wc->reada_count,
    7415             :                                         BTRFS_NODEPTRS_PER_BLOCK(root));
    7416             :         }
    7417             : 
    7418           0 :         eb = path->nodes[wc->level];
    7419             :         nritems = btrfs_header_nritems(eb);
    7420             :         blocksize = btrfs_level_size(root, wc->level - 1);
    7421             : 
    7422           0 :         for (slot = path->slots[wc->level]; slot < nritems; slot++) {
    7423           0 :                 if (nread >= wc->reada_count)
    7424             :                         break;
    7425             : 
    7426           0 :                 cond_resched();
    7427             :                 bytenr = btrfs_node_blockptr(eb, slot);
    7428             :                 generation = btrfs_node_ptr_generation(eb, slot);
    7429             : 
    7430           0 :                 if (slot == path->slots[wc->level])
    7431             :                         goto reada;
    7432             : 
    7433           0 :                 if (wc->stage == UPDATE_BACKREF &&
    7434           0 :                     generation <= root->root_key.offset)
    7435           0 :                         continue;
    7436             : 
    7437             :                 /* We don't lock the tree block, it's OK to be racy here */
    7438           0 :                 ret = btrfs_lookup_extent_info(trans, root, bytenr,
    7439           0 :                                                wc->level - 1, 1, &refs,
    7440             :                                                &flags);
    7441             :                 /* We don't care about errors in readahead. */
    7442           0 :                 if (ret < 0)
    7443           0 :                         continue;
    7444           0 :                 BUG_ON(refs == 0);
    7445             : 
    7446           0 :                 if (wc->stage == DROP_REFERENCE) {
    7447           0 :                         if (refs == 1)
    7448             :                                 goto reada;
    7449             : 
    7450           0 :                         if (wc->level == 1 &&
    7451           0 :                             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    7452           0 :                                 continue;
    7453           0 :                         if (!wc->update_ref ||
    7454           0 :                             generation <= root->root_key.offset)
    7455           0 :                                 continue;
    7456             :                         btrfs_node_key_to_cpu(eb, &key, slot);
    7457           0 :                         ret = btrfs_comp_cpu_keys(&key,
    7458             :                                                   &wc->update_progress);
    7459           0 :                         if (ret < 0)
    7460           0 :                                 continue;
    7461             :                 } else {
    7462           0 :                         if (wc->level == 1 &&
    7463           0 :                             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    7464           0 :                                 continue;
    7465             :                 }
    7466             : reada:
    7467           0 :                 ret = readahead_tree_block(root, bytenr, blocksize,
    7468             :                                            generation);
    7469           0 :                 if (ret)
    7470             :                         break;
    7471           0 :                 nread++;
    7472             :         }
    7473           0 :         wc->reada_slot = slot;
    7474           0 : }
    7475             : 
    7476         464 : static int account_leaf_items(struct btrfs_trans_handle *trans,
    7477             :                               struct btrfs_root *root,
    7478         464 :                               struct extent_buffer *eb)
    7479             : {
    7480         464 :         int nr = btrfs_header_nritems(eb);
    7481             :         int i, extent_type, ret;
    7482             :         struct btrfs_key key;
    7483             :         struct btrfs_file_extent_item *fi;
    7484             :         u64 bytenr, num_bytes;
    7485             : 
    7486        4143 :         for (i = 0; i < nr; i++) {
    7487        4143 :                 btrfs_item_key_to_cpu(eb, &key, i);
    7488             : 
    7489        4143 :                 if (key.type != BTRFS_EXTENT_DATA_KEY)
    7490        1982 :                         continue;
    7491             : 
    7492        2161 :                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
    7493             :                 /* filter out non qgroup-accountable extents  */
    7494             :                 extent_type = btrfs_file_extent_type(eb, fi);
    7495             : 
    7496        2161 :                 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
    7497           0 :                         continue;
    7498             : 
    7499             :                 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
    7500        2161 :                 if (!bytenr)
    7501         104 :                         continue;
    7502             : 
    7503             :                 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
    7504             : 
    7505        2057 :                 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
    7506             :                                               root->objectid,
    7507             :                                               bytenr, num_bytes,
    7508             :                                               BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
    7509        2057 :                 if (ret)
    7510             :                         return ret;
    7511             :         }
    7512             :         return 0;
    7513             : }
    7514             : 
    7515             : /*
    7516             :  * Walk up the tree from the bottom, freeing leaves and any interior
    7517             :  * nodes which have had all slots visited. If a node (leaf or
    7518             :  * interior) is freed, the node above it will have it's slot
    7519             :  * incremented. The root node will never be freed.
    7520             :  *
    7521             :  * At the end of this function, we should have a path which has all
    7522             :  * slots incremented to the next position for a search. If we need to
    7523             :  * read a new node it will be NULL and the node above it will have the
    7524             :  * correct slot selected for a later read.
    7525             :  *
    7526             :  * If we increment the root nodes slot counter past the number of
    7527             :  * elements, 1 is returned to signal completion of the search.
    7528             :  */
    7529           0 : static int adjust_slots_upwards(struct btrfs_root *root,
    7530             :                                 struct btrfs_path *path, int root_level)
    7531             : {
    7532             :         int level = 0;
    7533             :         int nr, slot;
    7534           0 :         struct extent_buffer *eb;
    7535             : 
    7536           0 :         if (root_level == 0)
    7537             :                 return 1;
    7538             : 
    7539           0 :         while (level <= root_level) {
    7540           0 :                 eb = path->nodes[level];
    7541           0 :                 nr = btrfs_header_nritems(eb);
    7542           0 :                 path->slots[level]++;
    7543             :                 slot = path->slots[level];
    7544           0 :                 if (slot >= nr || level == 0) {
    7545             :                         /*
    7546             :                          * Don't free the root -  we will detect this
    7547             :                          * condition after our loop and return a
    7548             :                          * positive value for caller to stop walking the tree.
    7549             :                          */
    7550           0 :                         if (level != root_level) {
    7551           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    7552           0 :                                 path->locks[level] = 0;
    7553             : 
    7554           0 :                                 free_extent_buffer(eb);
    7555           0 :                                 path->nodes[level] = NULL;
    7556           0 :                                 path->slots[level] = 0;
    7557             :                         }
    7558             :                 } else {
    7559             :                         /*
    7560             :                          * We have a valid slot to walk back down
    7561             :                          * from. Stop here so caller can process these
    7562             :                          * new nodes.
    7563             :                          */
    7564             :                         break;
    7565             :                 }
    7566             : 
    7567           0 :                 level++;
    7568             :         }
    7569             : 
    7570           0 :         eb = path->nodes[root_level];
    7571           0 :         if (path->slots[root_level] >= btrfs_header_nritems(eb))
    7572             :                 return 1;
    7573             : 
    7574             :         return 0;
    7575             : }
    7576             : 
    7577             : /*
    7578             :  * root_eb is the subtree root and is locked before this function is called.
    7579             :  */
    7580           2 : static int account_shared_subtree(struct btrfs_trans_handle *trans,
    7581             :                                   struct btrfs_root *root,
    7582             :                                   struct extent_buffer *root_eb,
    7583             :                                   u64 root_gen,
    7584             :                                   int root_level)
    7585             : {
    7586             :         int ret = 0;
    7587             :         int level;
    7588             :         struct extent_buffer *eb = root_eb;
    7589             :         struct btrfs_path *path = NULL;
    7590             : 
    7591           2 :         BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
    7592           2 :         BUG_ON(root_eb == NULL);
    7593             : 
    7594           2 :         if (!root->fs_info->quota_enabled)
    7595             :                 return 0;
    7596             : 
    7597           0 :         if (!extent_buffer_uptodate(root_eb)) {
    7598           0 :                 ret = btrfs_read_buffer(root_eb, root_gen);
    7599           0 :                 if (ret)
    7600             :                         goto out;
    7601             :         }
    7602             : 
    7603           0 :         if (root_level == 0) {
    7604           0 :                 ret = account_leaf_items(trans, root, root_eb);
    7605           0 :                 goto out;
    7606             :         }
    7607             : 
    7608           0 :         path = btrfs_alloc_path();
    7609           0 :         if (!path)
    7610             :                 return -ENOMEM;
    7611             : 
    7612             :         /*
    7613             :          * Walk down the tree.  Missing extent blocks are filled in as
    7614             :          * we go. Metadata is accounted every time we read a new
    7615             :          * extent block.
    7616             :          *
    7617             :          * When we reach a leaf, we account for file extent items in it,
    7618             :          * walk back up the tree (adjusting slot pointers as we go)
    7619             :          * and restart the search process.
    7620             :          */
    7621             :         extent_buffer_get(root_eb); /* For path */
    7622           0 :         path->nodes[root_level] = root_eb;
    7623           0 :         path->slots[root_level] = 0;
    7624           0 :         path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
    7625             : walk_down:
    7626             :         level = root_level;
    7627           0 :         while (level >= 0) {
    7628           0 :                 if (path->nodes[level] == NULL) {
    7629           0 :                         int child_bsize = root->nodesize;
    7630             :                         int parent_slot;
    7631             :                         u64 child_gen;
    7632             :                         u64 child_bytenr;
    7633             : 
    7634             :                         /* We need to get child blockptr/gen from
    7635             :                          * parent before we can read it. */
    7636           0 :                         eb = path->nodes[level + 1];
    7637           0 :                         parent_slot = path->slots[level + 1];
    7638             :                         child_bytenr = btrfs_node_blockptr(eb, parent_slot);
    7639             :                         child_gen = btrfs_node_ptr_generation(eb, parent_slot);
    7640             : 
    7641           0 :                         eb = read_tree_block(root, child_bytenr, child_bsize,
    7642             :                                              child_gen);
    7643           0 :                         if (!eb || !extent_buffer_uptodate(eb)) {
    7644             :                                 ret = -EIO;
    7645             :                                 goto out;
    7646             :                         }
    7647             : 
    7648           0 :                         path->nodes[level] = eb;
    7649           0 :                         path->slots[level] = 0;
    7650             : 
    7651           0 :                         btrfs_tree_read_lock(eb);
    7652           0 :                         btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
    7653           0 :                         path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
    7654             : 
    7655           0 :                         ret = btrfs_qgroup_record_ref(trans, root->fs_info,
    7656             :                                                 root->objectid,
    7657             :                                                 child_bytenr,
    7658             :                                                 child_bsize,
    7659             :                                                 BTRFS_QGROUP_OPER_SUB_SUBTREE,
    7660             :                                                 0);
    7661           0 :                         if (ret)
    7662             :                                 goto out;
    7663             : 
    7664             :                 }
    7665             : 
    7666           0 :                 if (level == 0) {
    7667           0 :                         ret = account_leaf_items(trans, root, path->nodes[level]);
    7668           0 :                         if (ret)
    7669             :                                 goto out;
    7670             : 
    7671             :                         /* Nonzero return here means we completed our search */
    7672           0 :                         ret = adjust_slots_upwards(root, path, root_level);
    7673           0 :                         if (ret)
    7674             :                                 break;
    7675             : 
    7676             :                         /* Restart search with new slots */
    7677             :                         goto walk_down;
    7678             :                 }
    7679             : 
    7680           0 :                 level--;
    7681             :         }
    7682             : 
    7683             :         ret = 0;
    7684             : out:
    7685           0 :         btrfs_free_path(path);
    7686             : 
    7687           0 :         return ret;
    7688             : }
    7689             : 
    7690             : /*
    7691             :  * helper to process tree block while walking down the tree.
    7692             :  *
    7693             :  * when wc->stage == UPDATE_BACKREF, this function updates
    7694             :  * back refs for pointers in the block.
    7695             :  *
    7696             :  * NOTE: return value 1 means we should stop walking down.
    7697             :  */
    7698         482 : static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
    7699             :                                    struct btrfs_root *root,
    7700             :                                    struct btrfs_path *path,
    7701             :                                    struct walk_control *wc, int lookup_info)
    7702             : {
    7703         482 :         int level = wc->level;
    7704         482 :         struct extent_buffer *eb = path->nodes[level];
    7705             :         u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    7706             :         int ret;
    7707             : 
    7708         482 :         if (wc->stage == UPDATE_BACKREF &&
    7709           0 :             btrfs_header_owner(eb) != root->root_key.objectid)
    7710             :                 return 1;
    7711             : 
    7712             :         /*
    7713             :          * when reference count of tree block is 1, it won't increase
    7714             :          * again. once full backref flag is set, we never clear it.
    7715             :          */
    7716         482 :         if (lookup_info &&
    7717         466 :             ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
    7718           0 :              (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
    7719         450 :                 BUG_ON(!path->locks[level]);
    7720         450 :                 ret = btrfs_lookup_extent_info(trans, root,
    7721             :                                                eb->start, level, 1,
    7722             :                                                &wc->refs[level],
    7723             :                                                &wc->flags[level]);
    7724         450 :                 BUG_ON(ret == -ENOMEM);
    7725         450 :                 if (ret)
    7726             :                         return ret;
    7727         450 :                 BUG_ON(wc->refs[level] == 0);
    7728             :         }
    7729             : 
    7730         482 :         if (wc->stage == DROP_REFERENCE) {
    7731         482 :                 if (wc->refs[level] > 1)
    7732             :                         return 1;
    7733             : 
    7734         482 :                 if (path->locks[level] && !wc->keep_locks) {
    7735         466 :                         btrfs_tree_unlock_rw(eb, path->locks[level]);
    7736         466 :                         path->locks[level] = 0;
    7737             :                 }
    7738             :                 return 0;
    7739             :         }
    7740             : 
    7741             :         /* wc->stage == UPDATE_BACKREF */
    7742           0 :         if (!(wc->flags[level] & flag)) {
    7743           0 :                 BUG_ON(!path->locks[level]);
    7744             :                 ret = btrfs_inc_ref(trans, root, eb, 1);
    7745           0 :                 BUG_ON(ret); /* -ENOMEM */
    7746             :                 ret = btrfs_dec_ref(trans, root, eb, 0);
    7747           0 :                 BUG_ON(ret); /* -ENOMEM */
    7748           0 :                 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
    7749           0 :                                                   eb->len, flag,
    7750             :                                                   btrfs_header_level(eb), 0);
    7751           0 :                 BUG_ON(ret); /* -ENOMEM */
    7752           0 :                 wc->flags[level] |= flag;
    7753             :         }
    7754             : 
    7755             :         /*
    7756             :          * the block is shared by multiple trees, so it's not good to
    7757             :          * keep the tree lock
    7758             :          */
    7759           0 :         if (path->locks[level] && level > 0) {
    7760           0 :                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    7761           0 :                 path->locks[level] = 0;
    7762             :         }
    7763             :         return 0;
    7764             : }
    7765             : 
    7766             : /*
    7767             :  * helper to process tree block pointer.
    7768             :  *
    7769             :  * when wc->stage == DROP_REFERENCE, this function checks
    7770             :  * reference count of the block pointed to. if the block
    7771             :  * is shared and we need update back refs for the subtree
    7772             :  * rooted at the block, this function changes wc->stage to
    7773             :  * UPDATE_BACKREF. if the block is shared and there is no
    7774             :  * need to update back, this function drops the reference
    7775             :  * to the block.
    7776             :  *
    7777             :  * NOTE: return value 1 means we should stop walking down.
    7778             :  */
    7779          18 : static noinline int do_walk_down(struct btrfs_trans_handle *trans,
    7780          18 :                                  struct btrfs_root *root,
    7781             :                                  struct btrfs_path *path,
    7782             :                                  struct walk_control *wc, int *lookup_info)
    7783             : {
    7784             :         u64 bytenr;
    7785             :         u64 generation;
    7786             :         u64 parent;
    7787             :         u32 blocksize;
    7788             :         struct btrfs_key key;
    7789          16 :         struct extent_buffer *next;
    7790          18 :         int level = wc->level;
    7791             :         int reada = 0;
    7792             :         int ret = 0;
    7793             :         bool need_account = false;
    7794             : 
    7795          18 :         generation = btrfs_node_ptr_generation(path->nodes[level],
    7796             :                                                path->slots[level]);
    7797             :         /*
    7798             :          * if the lower level block was created before the snapshot
    7799             :          * was created, we know there is no need to update back refs
    7800             :          * for the subtree
    7801             :          */
    7802          18 :         if (wc->stage == UPDATE_BACKREF &&
    7803           0 :             generation <= root->root_key.offset) {
    7804           0 :                 *lookup_info = 1;
    7805           0 :                 return 1;
    7806             :         }
    7807             : 
    7808          18 :         bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
    7809             :         blocksize = btrfs_level_size(root, level - 1);
    7810             : 
    7811          18 :         next = btrfs_find_tree_block(root, bytenr, blocksize);
    7812          18 :         if (!next) {
    7813          16 :                 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
    7814          16 :                 if (!next)
    7815             :                         return -ENOMEM;
    7816             :                 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
    7817             :                                                level - 1);
    7818             :                 reada = 1;
    7819             :         }
    7820          18 :         btrfs_tree_lock(next);
    7821             :         btrfs_set_lock_blocking(next);
    7822             : 
    7823          18 :         ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
    7824             :                                        &wc->refs[level - 1],
    7825          18 :                                        &wc->flags[level - 1]);
    7826          18 :         if (ret < 0) {
    7827           0 :                 btrfs_tree_unlock(next);
    7828           0 :                 return ret;
    7829             :         }
    7830             : 
    7831          18 :         if (unlikely(wc->refs[level - 1] == 0)) {
    7832           0 :                 btrfs_err(root->fs_info, "Missing references.");
    7833           0 :                 BUG();
    7834             :         }
    7835          18 :         *lookup_info = 0;
    7836             : 
    7837          18 :         if (wc->stage == DROP_REFERENCE) {
    7838          18 :                 if (wc->refs[level - 1] > 1) {
    7839             :                         need_account = true;
    7840           4 :                         if (level == 1 &&
    7841           2 :                             (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    7842             :                                 goto skip;
    7843             : 
    7844           0 :                         if (!wc->update_ref ||
    7845           0 :                             generation <= root->root_key.offset)
    7846             :                                 goto skip;
    7847             : 
    7848           0 :                         btrfs_node_key_to_cpu(path->nodes[level], &key,
    7849             :                                               path->slots[level]);
    7850           0 :                         ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
    7851           0 :                         if (ret < 0)
    7852             :                                 goto skip;
    7853             : 
    7854           0 :                         wc->stage = UPDATE_BACKREF;
    7855           0 :                         wc->shared_level = level - 1;
    7856             :                 }
    7857             :         } else {
    7858           0 :                 if (level == 1 &&
    7859           0 :                     (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    7860             :                         goto skip;
    7861             :         }
    7862             : 
    7863          16 :         if (!btrfs_buffer_uptodate(next, generation, 0)) {
    7864           0 :                 btrfs_tree_unlock(next);
    7865           0 :                 free_extent_buffer(next);
    7866             :                 next = NULL;
    7867           0 :                 *lookup_info = 1;
    7868             :         }
    7869             : 
    7870          16 :         if (!next) {
    7871           0 :                 if (reada && level == 1)
    7872           0 :                         reada_walk_down(trans, root, wc, path);
    7873           0 :                 next = read_tree_block(root, bytenr, blocksize, generation);
    7874           0 :                 if (!next || !extent_buffer_uptodate(next)) {
    7875           0 :                         free_extent_buffer(next);
    7876           0 :                         return -EIO;
    7877             :                 }
    7878           0 :                 btrfs_tree_lock(next);
    7879             :                 btrfs_set_lock_blocking(next);
    7880             :         }
    7881             : 
    7882             :         level--;
    7883          16 :         BUG_ON(level != btrfs_header_level(next));
    7884          16 :         path->nodes[level] = next;
    7885          16 :         path->slots[level] = 0;
    7886          16 :         path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
    7887          16 :         wc->level = level;
    7888          16 :         if (wc->level == 1)
    7889           0 :                 wc->reada_slot = 0;
    7890             :         return 0;
    7891             : skip:
    7892           2 :         wc->refs[level - 1] = 0;
    7893           2 :         wc->flags[level - 1] = 0;
    7894           2 :         if (wc->stage == DROP_REFERENCE) {
    7895           2 :                 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
    7896           2 :                         parent = path->nodes[level]->start;
    7897             :                 } else {
    7898           0 :                         BUG_ON(root->root_key.objectid !=
    7899             :                                btrfs_header_owner(path->nodes[level]));
    7900             :                         parent = 0;
    7901             :                 }
    7902             : 
    7903           2 :                 if (need_account) {
    7904           2 :                         ret = account_shared_subtree(trans, root, next,
    7905             :                                                      generation, level - 1);
    7906           2 :                         if (ret) {
    7907           0 :                                 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
    7908             :                                         "%d accounting shared subtree. Quota "
    7909             :                                         "is out of sync, rescan required.\n",
    7910             :                                         root->fs_info->sb->s_id, ret);
    7911             :                         }
    7912             :                 }
    7913           2 :                 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
    7914             :                                 root->root_key.objectid, level - 1, 0, 0);
    7915           2 :                 BUG_ON(ret); /* -ENOMEM */
    7916             :         }
    7917           2 :         btrfs_tree_unlock(next);
    7918           2 :         free_extent_buffer(next);
    7919           2 :         *lookup_info = 1;
    7920           2 :         return 1;
    7921             : }
    7922             : 
    7923             : /*
    7924             :  * helper to process tree block while walking up the tree.
    7925             :  *
    7926             :  * when wc->stage == DROP_REFERENCE, this function drops
    7927             :  * reference count on the block.
    7928             :  *
    7929             :  * when wc->stage == UPDATE_BACKREF, this function changes
    7930             :  * wc->stage back to DROP_REFERENCE if we changed wc->stage
    7931             :  * to UPDATE_BACKREF previously while processing the block.
    7932             :  *
    7933             :  * NOTE: return value 1 means we should stop walking up.
    7934             :  */
    7935         466 : static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
    7936             :                                  struct btrfs_root *root,
    7937             :                                  struct btrfs_path *path,
    7938             :                                  struct walk_control *wc)
    7939             : {
    7940             :         int ret;
    7941         466 :         int level = wc->level;
    7942         943 :         struct extent_buffer *eb = path->nodes[level];
    7943             :         u64 parent = 0;
    7944             : 
    7945         466 :         if (wc->stage == UPDATE_BACKREF) {
    7946           0 :                 BUG_ON(wc->shared_level < level);
    7947           0 :                 if (level < wc->shared_level)
    7948             :                         goto out;
    7949             : 
    7950           0 :                 ret = find_next_key(path, level + 1, &wc->update_progress);
    7951           0 :                 if (ret > 0)
    7952           0 :                         wc->update_ref = 0;
    7953             : 
    7954           0 :                 wc->stage = DROP_REFERENCE;
    7955           0 :                 wc->shared_level = -1;
    7956           0 :                 path->slots[level] = 0;
    7957             : 
    7958             :                 /*
    7959             :                  * check reference count again if the block isn't locked.
    7960             :                  * we should start walking down the tree again if reference
    7961             :                  * count is one.
    7962             :                  */
    7963           0 :                 if (!path->locks[level]) {
    7964           0 :                         BUG_ON(level == 0);
    7965           0 :                         btrfs_tree_lock(eb);
    7966             :                         btrfs_set_lock_blocking(eb);
    7967           0 :                         path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
    7968             : 
    7969           0 :                         ret = btrfs_lookup_extent_info(trans, root,
    7970             :                                                        eb->start, level, 1,
    7971             :                                                        &wc->refs[level],
    7972             :                                                        &wc->flags[level]);
    7973           0 :                         if (ret < 0) {
    7974           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    7975           0 :                                 path->locks[level] = 0;
    7976           0 :                                 return ret;
    7977             :                         }
    7978           0 :                         BUG_ON(wc->refs[level] == 0);
    7979           0 :                         if (wc->refs[level] == 1) {
    7980           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    7981           0 :                                 path->locks[level] = 0;
    7982           0 :                                 return 1;
    7983             :                         }
    7984             :                 }
    7985             :         }
    7986             : 
    7987             :         /* wc->stage == DROP_REFERENCE */
    7988         466 :         BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
    7989             : 
    7990         466 :         if (wc->refs[level] == 1) {
    7991         466 :                 if (level == 0) {
    7992         464 :                         if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    7993             :                                 ret = btrfs_dec_ref(trans, root, eb, 1);
    7994             :                         else
    7995             :                                 ret = btrfs_dec_ref(trans, root, eb, 0);
    7996         464 :                         BUG_ON(ret); /* -ENOMEM */
    7997         464 :                         ret = account_leaf_items(trans, root, eb);
    7998         464 :                         if (ret) {
    7999           0 :                                 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
    8000             :                                         "%d accounting leaf items. Quota "
    8001             :                                         "is out of sync, rescan required.\n",
    8002             :                                         root->fs_info->sb->s_id, ret);
    8003             :                         }
    8004             :                 }
    8005             :                 /* make block locked assertion in clean_tree_block happy */
    8006         932 :                 if (!path->locks[level] &&
    8007         466 :                     btrfs_header_generation(eb) == trans->transid) {
    8008           2 :                         btrfs_tree_lock(eb);
    8009             :                         btrfs_set_lock_blocking(eb);
    8010           2 :                         path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
    8011             :                 }
    8012         466 :                 clean_tree_block(trans, root, eb);
    8013             :         }
    8014             : 
    8015         466 :         if (eb == root->node) {
    8016         450 :                 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    8017         439 :                         parent = eb->start;
    8018             :                 else
    8019          22 :                         BUG_ON(root->root_key.objectid !=
    8020             :                                btrfs_header_owner(eb));
    8021             :         } else {
    8022          16 :                 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    8023          16 :                         parent = path->nodes[level + 1]->start;
    8024             :                 else
    8025           0 :                         BUG_ON(root->root_key.objectid !=
    8026             :                                btrfs_header_owner(path->nodes[level + 1]));
    8027             :         }
    8028             : 
    8029         466 :         btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
    8030             : out:
    8031         466 :         wc->refs[level] = 0;
    8032         466 :         wc->flags[level] = 0;
    8033         466 :         return 0;
    8034             : }
    8035             : 
    8036         464 : static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
    8037             :                                    struct btrfs_root *root,
    8038             :                                    struct btrfs_path *path,
    8039             :                                    struct walk_control *wc)
    8040             : {
    8041         464 :         int level = wc->level;
    8042         464 :         int lookup_info = 1;
    8043             :         int ret;
    8044             : 
    8045         946 :         while (level >= 0) {
    8046         482 :                 ret = walk_down_proc(trans, root, path, wc, lookup_info);
    8047         482 :                 if (ret > 0)
    8048             :                         break;
    8049             : 
    8050         482 :                 if (level == 0)
    8051             :                         break;
    8052             : 
    8053          36 :                 if (path->slots[level] >=
    8054          18 :                     btrfs_header_nritems(path->nodes[level]))
    8055             :                         break;
    8056             : 
    8057          18 :                 ret = do_walk_down(trans, root, path, wc, &lookup_info);
    8058          18 :                 if (ret > 0) {
    8059           2 :                         path->slots[level]++;
    8060           2 :                         continue;
    8061          16 :                 } else if (ret < 0)
    8062             :                         return ret;
    8063          16 :                 level = wc->level;
    8064             :         }
    8065             :         return 0;
    8066             : }
    8067             : 
    8068         464 : static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
    8069             :                                  struct btrfs_root *root,
    8070             :                                  struct btrfs_path *path,
    8071             :                                  struct walk_control *wc, int max_level)
    8072             : {
    8073         464 :         int level = wc->level;
    8074             :         int ret;
    8075             : 
    8076        1408 :         path->slots[level] = btrfs_header_nritems(path->nodes[level]);
    8077        1394 :         while (level < max_level && path->nodes[level]) {
    8078         480 :                 wc->level = level;
    8079         960 :                 if (path->slots[level] + 1 <
    8080         480 :                     btrfs_header_nritems(path->nodes[level])) {
    8081          14 :                         path->slots[level]++;
    8082          14 :                         return 0;
    8083             :                 } else {
    8084         466 :                         ret = walk_up_proc(trans, root, path, wc);
    8085         466 :                         if (ret > 0)
    8086             :                                 return 0;
    8087             : 
    8088         466 :                         if (path->locks[level]) {
    8089           2 :                                 btrfs_tree_unlock_rw(path->nodes[level],
    8090             :                                                      path->locks[level]);
    8091           2 :                                 path->locks[level] = 0;
    8092             :                         }
    8093         466 :                         free_extent_buffer(path->nodes[level]);
    8094         466 :                         path->nodes[level] = NULL;
    8095         466 :                         level++;
    8096             :                 }
    8097             :         }
    8098             :         return 1;
    8099             : }
    8100             : 
    8101             : /*
    8102             :  * drop a subvolume tree.
    8103             :  *
    8104             :  * this function traverses the tree freeing any blocks that only
    8105             :  * referenced by the tree.
    8106             :  *
    8107             :  * when a shared tree block is found. this function decreases its
    8108             :  * reference count by one. if update_ref is true, this function
    8109             :  * also make sure backrefs for the shared block and all lower level
    8110             :  * blocks are properly updated.
    8111             :  *
    8112             :  * If called with for_reloc == 0, may exit early with -EAGAIN
    8113             :  */
    8114         450 : int btrfs_drop_snapshot(struct btrfs_root *root,
    8115             :                          struct btrfs_block_rsv *block_rsv, int update_ref,
    8116             :                          int for_reloc)
    8117             : {
    8118             :         struct btrfs_path *path;
    8119             :         struct btrfs_trans_handle *trans;
    8120         450 :         struct btrfs_root *tree_root = root->fs_info->tree_root;
    8121         450 :         struct btrfs_root_item *root_item = &root->root_item;
    8122             :         struct walk_control *wc;
    8123             :         struct btrfs_key key;
    8124             :         int err = 0;
    8125             :         int ret;
    8126             :         int level;
    8127             :         bool root_dropped = false;
    8128             : 
    8129             :         btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
    8130             : 
    8131         450 :         path = btrfs_alloc_path();
    8132         450 :         if (!path) {
    8133             :                 err = -ENOMEM;
    8134             :                 goto out;
    8135             :         }
    8136             : 
    8137         450 :         wc = kzalloc(sizeof(*wc), GFP_NOFS);
    8138         450 :         if (!wc) {
    8139           0 :                 btrfs_free_path(path);
    8140             :                 err = -ENOMEM;
    8141           0 :                 goto out;
    8142             :         }
    8143             : 
    8144         450 :         trans = btrfs_start_transaction(tree_root, 0);
    8145         450 :         if (IS_ERR(trans)) {
    8146           0 :                 err = PTR_ERR(trans);
    8147           0 :                 goto out_free;
    8148             :         }
    8149             : 
    8150         450 :         if (block_rsv)
    8151         439 :                 trans->block_rsv = block_rsv;
    8152             : 
    8153         450 :         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
    8154         900 :                 level = btrfs_header_level(root->node);
    8155         450 :                 path->nodes[level] = btrfs_lock_root_node(root);
    8156             :                 btrfs_set_lock_blocking(path->nodes[level]);
    8157         450 :                 path->slots[level] = 0;
    8158         450 :                 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
    8159         450 :                 memset(&wc->update_progress, 0,
    8160             :                        sizeof(wc->update_progress));
    8161             :         } else {
    8162             :                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
    8163           0 :                 memcpy(&wc->update_progress, &key,
    8164             :                        sizeof(wc->update_progress));
    8165             : 
    8166           0 :                 level = root_item->drop_level;
    8167           0 :                 BUG_ON(level == 0);
    8168           0 :                 path->lowest_level = level;
    8169           0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    8170           0 :                 path->lowest_level = 0;
    8171           0 :                 if (ret < 0) {
    8172             :                         err = ret;
    8173             :                         goto out_end_trans;
    8174             :                 }
    8175           0 :                 WARN_ON(ret > 0);
    8176             : 
    8177             :                 /*
    8178             :                  * unlock our path, this is safe because only this
    8179             :                  * function is allowed to delete this snapshot
    8180             :                  */
    8181           0 :                 btrfs_unlock_up_safe(path, 0);
    8182             : 
    8183           0 :                 level = btrfs_header_level(root->node);
    8184             :                 while (1) {
    8185           0 :                         btrfs_tree_lock(path->nodes[level]);
    8186           0 :                         btrfs_set_lock_blocking(path->nodes[level]);
    8187           0 :                         path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
    8188             : 
    8189           0 :                         ret = btrfs_lookup_extent_info(trans, root,
    8190           0 :                                                 path->nodes[level]->start,
    8191             :                                                 level, 1, &wc->refs[level],
    8192             :                                                 &wc->flags[level]);
    8193           0 :                         if (ret < 0) {
    8194             :                                 err = ret;
    8195             :                                 goto out_end_trans;
    8196             :                         }
    8197           0 :                         BUG_ON(wc->refs[level] == 0);
    8198             : 
    8199           0 :                         if (level == root_item->drop_level)
    8200             :                                 break;
    8201             : 
    8202           0 :                         btrfs_tree_unlock(path->nodes[level]);
    8203           0 :                         path->locks[level] = 0;
    8204           0 :                         WARN_ON(wc->refs[level] != 1);
    8205           0 :                         level--;
    8206           0 :                 }
    8207             :         }
    8208             : 
    8209         450 :         wc->level = level;
    8210         450 :         wc->shared_level = -1;
    8211         450 :         wc->stage = DROP_REFERENCE;
    8212         450 :         wc->update_ref = update_ref;
    8213         450 :         wc->keep_locks = 0;
    8214         450 :         wc->for_reloc = for_reloc;
    8215         450 :         wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
    8216             : 
    8217             :         while (1) {
    8218             : 
    8219         464 :                 ret = walk_down_tree(trans, root, path, wc);
    8220         464 :                 if (ret < 0) {
    8221             :                         err = ret;
    8222             :                         break;
    8223             :                 }
    8224             : 
    8225         464 :                 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
    8226         464 :                 if (ret < 0) {
    8227             :                         err = ret;
    8228             :                         break;
    8229             :                 }
    8230             : 
    8231         464 :                 if (ret > 0) {
    8232         450 :                         BUG_ON(wc->stage != DROP_REFERENCE);
    8233             :                         break;
    8234             :                 }
    8235             : 
    8236          14 :                 if (wc->stage == DROP_REFERENCE) {
    8237          14 :                         level = wc->level;
    8238          14 :                         btrfs_node_key(path->nodes[level],
    8239             :                                        &root_item->drop_progress,
    8240             :                                        path->slots[level]);
    8241          14 :                         root_item->drop_level = level;
    8242             :                 }
    8243             : 
    8244          14 :                 BUG_ON(wc->level == 0);
    8245          14 :                 if (btrfs_should_end_transaction(trans, tree_root) ||
    8246           0 :                     (!for_reloc && btrfs_need_cleaner_sleep(root))) {
    8247           0 :                         ret = btrfs_update_root(trans, tree_root,
    8248             :                                                 &root->root_key,
    8249             :                                                 root_item);
    8250           0 :                         if (ret) {
    8251           0 :                                 btrfs_abort_transaction(trans, tree_root, ret);
    8252             :                                 err = ret;
    8253           0 :                                 goto out_end_trans;
    8254             :                         }
    8255             : 
    8256             :                         /*
    8257             :                          * Qgroup update accounting is run from
    8258             :                          * delayed ref handling. This usually works
    8259             :                          * out because delayed refs are normally the
    8260             :                          * only way qgroup updates are added. However,
    8261             :                          * we may have added updates during our tree
    8262             :                          * walk so run qgroups here to make sure we
    8263             :                          * don't lose any updates.
    8264             :                          */
    8265           0 :                         ret = btrfs_delayed_qgroup_accounting(trans,
    8266             :                                                               root->fs_info);
    8267           0 :                         if (ret)
    8268           0 :                                 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
    8269             :                                                    "running qgroup updates "
    8270             :                                                    "during snapshot delete. "
    8271             :                                                    "Quota is out of sync, "
    8272             :                                                    "rescan required.\n", ret);
    8273             : 
    8274           0 :                         btrfs_end_transaction_throttle(trans, tree_root);
    8275           0 :                         if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
    8276           0 :                                 pr_debug("BTRFS: drop snapshot early exit\n");
    8277             :                                 err = -EAGAIN;
    8278             :                                 goto out_free;
    8279             :                         }
    8280             : 
    8281           0 :                         trans = btrfs_start_transaction(tree_root, 0);
    8282           0 :                         if (IS_ERR(trans)) {
    8283           0 :                                 err = PTR_ERR(trans);
    8284           0 :                                 goto out_free;
    8285             :                         }
    8286           0 :                         if (block_rsv)
    8287           0 :                                 trans->block_rsv = block_rsv;
    8288             :                 }
    8289             :         }
    8290         450 :         btrfs_release_path(path);
    8291         450 :         if (err)
    8292             :                 goto out_end_trans;
    8293             : 
    8294         450 :         ret = btrfs_del_root(trans, tree_root, &root->root_key);
    8295         450 :         if (ret) {
    8296           0 :                 btrfs_abort_transaction(trans, tree_root, ret);
    8297           0 :                 goto out_end_trans;
    8298             :         }
    8299             : 
    8300         450 :         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
    8301          11 :                 ret = btrfs_find_root(tree_root, &root->root_key, path,
    8302             :                                       NULL, NULL);
    8303          11 :                 if (ret < 0) {
    8304           0 :                         btrfs_abort_transaction(trans, tree_root, ret);
    8305             :                         err = ret;
    8306           0 :                         goto out_end_trans;
    8307          11 :                 } else if (ret > 0) {
    8308             :                         /* if we fail to delete the orphan item this time
    8309             :                          * around, it'll get picked up the next time.
    8310             :                          *
    8311             :                          * The most common failure here is just -ENOENT.
    8312             :                          */
    8313          11 :                         btrfs_del_orphan_item(trans, tree_root,
    8314             :                                               root->root_key.objectid);
    8315             :                 }
    8316             :         }
    8317             : 
    8318         450 :         if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
    8319          11 :                 btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
    8320             :         } else {
    8321         439 :                 free_extent_buffer(root->node);
    8322         439 :                 free_extent_buffer(root->commit_root);
    8323         439 :                 btrfs_put_fs_root(root);
    8324             :         }
    8325             :         root_dropped = true;
    8326             : out_end_trans:
    8327         450 :         ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
    8328         450 :         if (ret)
    8329           0 :                 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
    8330             :                                    "running qgroup updates "
    8331             :                                    "during snapshot delete. "
    8332             :                                    "Quota is out of sync, "
    8333             :                                    "rescan required.\n", ret);
    8334             : 
    8335         450 :         btrfs_end_transaction_throttle(trans, tree_root);
    8336             : out_free:
    8337         450 :         kfree(wc);
    8338         450 :         btrfs_free_path(path);
    8339             : out:
    8340             :         /*
    8341             :          * So if we need to stop dropping the snapshot for whatever reason we
    8342             :          * need to make sure to add it back to the dead root list so that we
    8343             :          * keep trying to do the work later.  This also cleans up roots if we
    8344             :          * don't have it in the radix (like when we recover after a power fail
    8345             :          * or unmount) so we don't leak memory.
    8346             :          */
    8347         450 :         if (!for_reloc && root_dropped == false)
    8348           0 :                 btrfs_add_dead_root(root);
    8349         450 :         if (err && err != -EAGAIN)
    8350           0 :                 btrfs_std_error(root->fs_info, err);
    8351         450 :         return err;
    8352             : }
    8353             : 
    8354             : /*
    8355             :  * drop subtree rooted at tree block 'node'.
    8356             :  *
    8357             :  * NOTE: this function will unlock and release tree block 'node'
    8358             :  * only used by relocation code
    8359             :  */
    8360           0 : int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
    8361             :                         struct btrfs_root *root,
    8362           0 :                         struct extent_buffer *node,
    8363           0 :                         struct extent_buffer *parent)
    8364             : {
    8365             :         struct btrfs_path *path;
    8366             :         struct walk_control *wc;
    8367             :         int level;
    8368             :         int parent_level;
    8369             :         int ret = 0;
    8370             :         int wret;
    8371             : 
    8372           0 :         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
    8373             : 
    8374           0 :         path = btrfs_alloc_path();
    8375           0 :         if (!path)
    8376             :                 return -ENOMEM;
    8377             : 
    8378           0 :         wc = kzalloc(sizeof(*wc), GFP_NOFS);
    8379           0 :         if (!wc) {
    8380           0 :                 btrfs_free_path(path);
    8381           0 :                 return -ENOMEM;
    8382             :         }
    8383             : 
    8384           0 :         btrfs_assert_tree_locked(parent);
    8385           0 :         parent_level = btrfs_header_level(parent);
    8386             :         extent_buffer_get(parent);
    8387           0 :         path->nodes[parent_level] = parent;
    8388           0 :         path->slots[parent_level] = btrfs_header_nritems(parent);
    8389             : 
    8390           0 :         btrfs_assert_tree_locked(node);
    8391           0 :         level = btrfs_header_level(node);
    8392           0 :         path->nodes[level] = node;
    8393           0 :         path->slots[level] = 0;
    8394           0 :         path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
    8395             : 
    8396           0 :         wc->refs[parent_level] = 1;
    8397           0 :         wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    8398           0 :         wc->level = level;
    8399           0 :         wc->shared_level = -1;
    8400           0 :         wc->stage = DROP_REFERENCE;
    8401           0 :         wc->update_ref = 0;
    8402           0 :         wc->keep_locks = 1;
    8403           0 :         wc->for_reloc = 1;
    8404           0 :         wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
    8405             : 
    8406             :         while (1) {
    8407           0 :                 wret = walk_down_tree(trans, root, path, wc);
    8408           0 :                 if (wret < 0) {
    8409             :                         ret = wret;
    8410             :                         break;
    8411             :                 }
    8412             : 
    8413           0 :                 wret = walk_up_tree(trans, root, path, wc, parent_level);
    8414           0 :                 if (wret < 0)
    8415             :                         ret = wret;
    8416           0 :                 if (wret != 0)
    8417             :                         break;
    8418             :         }
    8419             : 
    8420           0 :         kfree(wc);
    8421           0 :         btrfs_free_path(path);
    8422           0 :         return ret;
    8423             : }
    8424             : 
    8425          66 : static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
    8426             : {
    8427             :         u64 num_devices;
    8428             :         u64 stripped;
    8429             : 
    8430             :         /*
    8431             :          * if restripe for this chunk_type is on pick target profile and
    8432             :          * return, otherwise do the usual balance
    8433             :          */
    8434          66 :         stripped = get_restripe_target(root->fs_info, flags);
    8435          66 :         if (stripped)
    8436             :                 return extended_to_chunk(stripped);
    8437             : 
    8438          66 :         num_devices = root->fs_info->fs_devices->rw_devices;
    8439             : 
    8440             :         stripped = BTRFS_BLOCK_GROUP_RAID0 |
    8441             :                 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
    8442             :                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
    8443             : 
    8444          66 :         if (num_devices == 1) {
    8445             :                 stripped |= BTRFS_BLOCK_GROUP_DUP;
    8446          66 :                 stripped = flags & ~stripped;
    8447             : 
    8448             :                 /* turn raid0 into single device chunks */
    8449          66 :                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
    8450             :                         return stripped;
    8451             : 
    8452             :                 /* turn mirroring into duplication */
    8453          66 :                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
    8454             :                              BTRFS_BLOCK_GROUP_RAID10))
    8455           0 :                         return stripped | BTRFS_BLOCK_GROUP_DUP;
    8456             :         } else {
    8457             :                 /* they already had raid on here, just return */
    8458           0 :                 if (flags & stripped)
    8459             :                         return flags;
    8460             : 
    8461             :                 stripped |= BTRFS_BLOCK_GROUP_DUP;
    8462           0 :                 stripped = flags & ~stripped;
    8463             : 
    8464             :                 /* switch duplicated blocks with raid1 */
    8465           0 :                 if (flags & BTRFS_BLOCK_GROUP_DUP)
    8466           0 :                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
    8467             : 
    8468             :                 /* this is drive concat, leave it alone */
    8469             :         }
    8470             : 
    8471             :         return flags;
    8472             : }
    8473             : 
    8474         532 : static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
    8475             : {
    8476         532 :         struct btrfs_space_info *sinfo = cache->space_info;
    8477             :         u64 num_bytes;
    8478             :         u64 min_allocable_bytes;
    8479             :         int ret = -ENOSPC;
    8480             : 
    8481             : 
    8482             :         /*
    8483             :          * We need some metadata space and system metadata space for
    8484             :          * allocating chunks in some corner cases until we force to set
    8485             :          * it to be readonly.
    8486             :          */
    8487         532 :         if ((sinfo->flags &
    8488         502 :              (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
    8489             :             !force)
    8490             :                 min_allocable_bytes = 1 * 1024 * 1024;
    8491             :         else
    8492             :                 min_allocable_bytes = 0;
    8493             : 
    8494             :         spin_lock(&sinfo->lock);
    8495             :         spin_lock(&cache->lock);
    8496             : 
    8497         532 :         if (cache->ro) {
    8498             :                 ret = 0;
    8499             :                 goto out;
    8500             :         }
    8501             : 
    8502        1596 :         num_bytes = cache->key.offset - cache->reserved - cache->pinned -
    8503         532 :                     cache->bytes_super - btrfs_block_group_used(&cache->item);
    8504             : 
    8505        1596 :         if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
    8506        1064 :             sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
    8507         532 :             min_allocable_bytes <= sinfo->total_bytes) {
    8508         488 :                 sinfo->bytes_readonly += num_bytes;
    8509         488 :                 cache->ro = 1;
    8510             :                 ret = 0;
    8511             :         }
    8512             : out:
    8513             :         spin_unlock(&cache->lock);
    8514             :         spin_unlock(&sinfo->lock);
    8515         532 :         return ret;
    8516             : }
    8517             : 
    8518         132 : int btrfs_set_block_group_ro(struct btrfs_root *root,
    8519             :                              struct btrfs_block_group_cache *cache)
    8520             : 
    8521             : {
    8522             :         struct btrfs_trans_handle *trans;
    8523             :         u64 alloc_flags;
    8524             :         int ret;
    8525             : 
    8526          66 :         BUG_ON(cache->ro);
    8527             : 
    8528          66 :         trans = btrfs_join_transaction(root);
    8529          66 :         if (IS_ERR(trans))
    8530           0 :                 return PTR_ERR(trans);
    8531             : 
    8532         132 :         alloc_flags = update_block_group_flags(root, cache->flags);
    8533          66 :         if (alloc_flags != cache->flags) {
    8534           0 :                 ret = do_chunk_alloc(trans, root, alloc_flags,
    8535             :                                      CHUNK_ALLOC_FORCE);
    8536           0 :                 if (ret < 0)
    8537             :                         goto out;
    8538             :         }
    8539             : 
    8540          66 :         ret = set_block_group_ro(cache, 0);
    8541          66 :         if (!ret)
    8542             :                 goto out;
    8543          44 :         alloc_flags = get_alloc_profile(root, cache->space_info->flags);
    8544          44 :         ret = do_chunk_alloc(trans, root, alloc_flags,
    8545             :                              CHUNK_ALLOC_FORCE);
    8546          44 :         if (ret < 0)
    8547             :                 goto out;
    8548          44 :         ret = set_block_group_ro(cache, 0);
    8549             : out:
    8550          66 :         btrfs_end_transaction(trans, root);
    8551          66 :         return ret;
    8552             : }
    8553             : 
    8554           0 : int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
    8555             :                             struct btrfs_root *root, u64 type)
    8556             : {
    8557           0 :         u64 alloc_flags = get_alloc_profile(root, type);
    8558           0 :         return do_chunk_alloc(trans, root, alloc_flags,
    8559             :                               CHUNK_ALLOC_FORCE);
    8560             : }
    8561             : 
    8562             : /*
    8563             :  * helper to account the unused space of all the readonly block group in the
    8564             :  * list. takes mirrors into account.
    8565             :  */
    8566       48484 : static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
    8567             : {
    8568             :         struct btrfs_block_group_cache *block_group;
    8569             :         u64 free_bytes = 0;
    8570             :         int factor;
    8571             : 
    8572      139400 :         list_for_each_entry(block_group, groups_list, list) {
    8573             :                 spin_lock(&block_group->lock);
    8574             : 
    8575       90916 :                 if (!block_group->ro) {
    8576             :                         spin_unlock(&block_group->lock);
    8577       90916 :                         continue;
    8578             :                 }
    8579             : 
    8580           0 :                 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
    8581             :                                           BTRFS_BLOCK_GROUP_RAID10 |
    8582             :                                           BTRFS_BLOCK_GROUP_DUP))
    8583             :                         factor = 2;
    8584             :                 else
    8585             :                         factor = 1;
    8586             : 
    8587           0 :                 free_bytes += (block_group->key.offset -
    8588           0 :                                btrfs_block_group_used(&block_group->item)) *
    8589             :                                factor;
    8590             : 
    8591             :                 spin_unlock(&block_group->lock);
    8592             :         }
    8593             : 
    8594       48484 :         return free_bytes;
    8595             : }
    8596             : 
    8597             : /*
    8598             :  * helper to account the unused space of all the readonly block group in the
    8599             :  * space_info. takes mirrors into account.
    8600             :  */
    8601       48481 : u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
    8602             : {
    8603             :         int i;
    8604             :         u64 free_bytes = 0;
    8605             : 
    8606             :         spin_lock(&sinfo->lock);
    8607             : 
    8608      387848 :         for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
    8609      678734 :                 if (!list_empty(&sinfo->block_groups[i]))
    8610       48484 :                         free_bytes += __btrfs_get_ro_block_group_free_space(
    8611             :                                                 &sinfo->block_groups[i]);
    8612             : 
    8613             :         spin_unlock(&sinfo->lock);
    8614             : 
    8615       48481 :         return free_bytes;
    8616             : }
    8617             : 
    8618           0 : void btrfs_set_block_group_rw(struct btrfs_root *root,
    8619             :                               struct btrfs_block_group_cache *cache)
    8620             : {
    8621           0 :         struct btrfs_space_info *sinfo = cache->space_info;
    8622             :         u64 num_bytes;
    8623             : 
    8624           0 :         BUG_ON(!cache->ro);
    8625             : 
    8626             :         spin_lock(&sinfo->lock);
    8627             :         spin_lock(&cache->lock);
    8628           0 :         num_bytes = cache->key.offset - cache->reserved - cache->pinned -
    8629           0 :                     cache->bytes_super - btrfs_block_group_used(&cache->item);
    8630           0 :         sinfo->bytes_readonly -= num_bytes;
    8631           0 :         cache->ro = 0;
    8632             :         spin_unlock(&cache->lock);
    8633             :         spin_unlock(&sinfo->lock);
    8634           0 : }
    8635             : 
    8636             : /*
    8637             :  * checks to see if its even possible to relocate this block group.
    8638             :  *
    8639             :  * @return - -1 if it's not a good idea to relocate this block group, 0 if its
    8640             :  * ok to go ahead and try.
    8641             :  */
    8642          72 : int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
    8643             : {
    8644             :         struct btrfs_block_group_cache *block_group;
    8645             :         struct btrfs_space_info *space_info;
    8646         132 :         struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
    8647             :         struct btrfs_device *device;
    8648             :         struct btrfs_trans_handle *trans;
    8649             :         u64 min_free;
    8650             :         u64 dev_min = 1;
    8651             :         u64 dev_nr = 0;
    8652             :         u64 target;
    8653             :         int index;
    8654             :         int full = 0;
    8655             :         int ret = 0;
    8656             : 
    8657             :         block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
    8658             : 
    8659             :         /* odd, couldn't find the block group, leave it alone */
    8660          72 :         if (!block_group)
    8661             :                 return -1;
    8662             : 
    8663             :         min_free = btrfs_block_group_used(&block_group->item);
    8664             : 
    8665             :         /* no bytes used, we're good */
    8666          72 :         if (!min_free)
    8667             :                 goto out;
    8668             : 
    8669          66 :         space_info = block_group->space_info;
    8670             :         spin_lock(&space_info->lock);
    8671             : 
    8672          66 :         full = space_info->full;
    8673             : 
    8674             :         /*
    8675             :          * if this is the last block group we have in this space, we can't
    8676             :          * relocate it unless we're able to allocate a new chunk below.
    8677             :          *
    8678             :          * Otherwise, we need to make sure we have room in the space to handle
    8679             :          * all of the extents from this block group.  If we can, we're good
    8680             :          */
    8681          72 :         if ((space_info->total_bytes != block_group->key.offset) &&
    8682          12 :             (space_info->bytes_used + space_info->bytes_reserved +
    8683          12 :              space_info->bytes_pinned + space_info->bytes_readonly +
    8684             :              min_free < space_info->total_bytes)) {
    8685             :                 spin_unlock(&space_info->lock);
    8686             :                 goto out;
    8687             :         }
    8688             :         spin_unlock(&space_info->lock);
    8689             : 
    8690             :         /*
    8691             :          * ok we don't have enough space, but maybe we have free space on our
    8692             :          * devices to allocate new chunks for relocation, so loop through our
    8693             :          * alloc devices and guess if we have enough space.  if this block
    8694             :          * group is going to be restriped, run checks against the target
    8695             :          * profile instead of the current one.
    8696             :          */
    8697             :         ret = -1;
    8698             : 
    8699             :         /*
    8700             :          * index:
    8701             :          *      0: raid10
    8702             :          *      1: raid1
    8703             :          *      2: dup
    8704             :          *      3: raid0
    8705             :          *      4: single
    8706             :          */
    8707         120 :         target = get_restripe_target(root->fs_info, block_group->flags);
    8708          60 :         if (target) {
    8709           0 :                 index = __get_raid_index(extended_to_chunk(target));
    8710             :         } else {
    8711             :                 /*
    8712             :                  * this is just a balance, so if we were marked as full
    8713             :                  * we know there is no space for a new chunk
    8714             :                  */
    8715          60 :                 if (full)
    8716             :                         goto out;
    8717             : 
    8718             :                 index = get_block_group_index(block_group);
    8719             :         }
    8720             : 
    8721          60 :         if (index == BTRFS_RAID_RAID10) {
    8722             :                 dev_min = 4;
    8723             :                 /* Divide by 2 */
    8724           0 :                 min_free >>= 1;
    8725          60 :         } else if (index == BTRFS_RAID_RAID1) {
    8726             :                 dev_min = 2;
    8727          60 :         } else if (index == BTRFS_RAID_DUP) {
    8728             :                 /* Multiply by 2 */
    8729          38 :                 min_free <<= 1;
    8730          22 :         } else if (index == BTRFS_RAID_RAID0) {
    8731           0 :                 dev_min = fs_devices->rw_devices;
    8732           0 :                 do_div(min_free, dev_min);
    8733             :         }
    8734             : 
    8735             :         /* We need to do this so that we can look at pending chunks */
    8736          60 :         trans = btrfs_join_transaction(root);
    8737          60 :         if (IS_ERR(trans)) {
    8738           0 :                 ret = PTR_ERR(trans);
    8739           0 :                 goto out;
    8740             :         }
    8741             : 
    8742          60 :         mutex_lock(&root->fs_info->chunk_mutex);
    8743          60 :         list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
    8744             :                 u64 dev_offset;
    8745             : 
    8746             :                 /*
    8747             :                  * check to make sure we can actually find a chunk with enough
    8748             :                  * space to fit our block group in.
    8749             :                  */
    8750         120 :                 if (device->total_bytes > device->bytes_used + min_free &&
    8751          60 :                     !device->is_tgtdev_for_dev_replace) {
    8752          60 :                         ret = find_free_dev_extent(trans, device, min_free,
    8753             :                                                    &dev_offset, NULL);
    8754          60 :                         if (!ret)
    8755          60 :                                 dev_nr++;
    8756             : 
    8757          60 :                         if (dev_nr >= dev_min)
    8758             :                                 break;
    8759             : 
    8760             :                         ret = -1;
    8761             :                 }
    8762             :         }
    8763          60 :         mutex_unlock(&root->fs_info->chunk_mutex);
    8764          60 :         btrfs_end_transaction(trans, root);
    8765             : out:
    8766          72 :         btrfs_put_block_group(block_group);
    8767          72 :         return ret;
    8768             : }
    8769             : 
    8770        1362 : static int find_first_block_group(struct btrfs_root *root,
    8771             :                 struct btrfs_path *path, struct btrfs_key *key)
    8772             : {
    8773             :         int ret = 0;
    8774             :         struct btrfs_key found_key;
    8775        1364 :         struct extent_buffer *leaf;
    8776             :         int slot;
    8777             : 
    8778        1362 :         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
    8779        1362 :         if (ret < 0)
    8780             :                 goto out;
    8781             : 
    8782             :         while (1) {
    8783        1364 :                 slot = path->slots[0];
    8784        1364 :                 leaf = path->nodes[0];
    8785        2728 :                 if (slot >= btrfs_header_nritems(leaf)) {
    8786         222 :                         ret = btrfs_next_leaf(root, path);
    8787         222 :                         if (ret == 0)
    8788           1 :                                 continue;
    8789             :                         if (ret < 0)
    8790             :                                 goto out;
    8791             :                         break;
    8792             :                 }
    8793        1142 :                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
    8794             : 
    8795        2284 :                 if (found_key.objectid >= key->objectid &&
    8796        1142 :                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
    8797             :                         ret = 0;
    8798             :                         goto out;
    8799             :                 }
    8800           1 :                 path->slots[0]++;
    8801             :         }
    8802             : out:
    8803        1362 :         return ret;
    8804             : }
    8805             : 
    8806         221 : void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
    8807             : {
    8808             :         struct btrfs_block_group_cache *block_group;
    8809             :         u64 last = 0;
    8810             : 
    8811             :         while (1) {
    8812             :                 struct inode *inode;
    8813             : 
    8814             :                 block_group = btrfs_lookup_first_block_group(info, last);
    8815        3281 :                 while (block_group) {
    8816             :                         spin_lock(&block_group->lock);
    8817        2177 :                         if (block_group->iref)
    8818             :                                 break;
    8819             :                         spin_unlock(&block_group->lock);
    8820        1903 :                         block_group = next_block_group(info->tree_root,
    8821             :                                                        block_group);
    8822             :                 }
    8823         689 :                 if (!block_group) {
    8824         415 :                         if (last == 0)
    8825             :                                 break;
    8826             :                         last = 0;
    8827         194 :                         continue;
    8828             :                 }
    8829             : 
    8830         274 :                 inode = block_group->inode;
    8831         274 :                 block_group->iref = 0;
    8832         274 :                 block_group->inode = NULL;
    8833             :                 spin_unlock(&block_group->lock);
    8834         274 :                 iput(inode);
    8835         274 :                 last = block_group->key.objectid + block_group->key.offset;
    8836         274 :                 btrfs_put_block_group(block_group);
    8837             :         }
    8838         221 : }
    8839             : 
    8840         221 : int btrfs_free_block_groups(struct btrfs_fs_info *info)
    8841             : {
    8842             :         struct btrfs_block_group_cache *block_group;
    8843             :         struct btrfs_space_info *space_info;
    8844             :         struct btrfs_caching_control *caching_ctl;
    8845             :         struct rb_node *n;
    8846             : 
    8847         221 :         down_write(&info->commit_root_sem);
    8848         663 :         while (!list_empty(&info->caching_block_groups)) {
    8849             :                 caching_ctl = list_entry(info->caching_block_groups.next,
    8850             :                                          struct btrfs_caching_control, list);
    8851           0 :                 list_del(&caching_ctl->list);
    8852           0 :                 put_caching_control(caching_ctl);
    8853             :         }
    8854         221 :         up_write(&info->commit_root_sem);
    8855             : 
    8856             :         spin_lock(&info->block_group_cache_lock);
    8857        1377 :         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
    8858        1156 :                 block_group = rb_entry(n, struct btrfs_block_group_cache,
    8859             :                                        cache_node);
    8860        1156 :                 rb_erase(&block_group->cache_node,
    8861             :                          &info->block_group_cache_tree);
    8862             :                 spin_unlock(&info->block_group_cache_lock);
    8863             : 
    8864        1156 :                 down_write(&block_group->space_info->groups_sem);
    8865        1156 :                 list_del(&block_group->list);
    8866        1156 :                 up_write(&block_group->space_info->groups_sem);
    8867             : 
    8868        1156 :                 if (block_group->cached == BTRFS_CACHE_STARTED)
    8869           0 :                         wait_block_group_cache_done(block_group);
    8870             : 
    8871             :                 /*
    8872             :                  * We haven't cached this block group, which means we could
    8873             :                  * possibly have excluded extents on this block group.
    8874             :                  */
    8875        1156 :                 if (block_group->cached == BTRFS_CACHE_NO ||
    8876             :                     block_group->cached == BTRFS_CACHE_ERROR)
    8877         260 :                         free_excluded_extents(info->extent_root, block_group);
    8878             : 
    8879        1156 :                 btrfs_remove_free_space_cache(block_group);
    8880        1156 :                 btrfs_put_block_group(block_group);
    8881             : 
    8882             :                 spin_lock(&info->block_group_cache_lock);
    8883             :         }
    8884             :         spin_unlock(&info->block_group_cache_lock);
    8885             : 
    8886             :         /* now that all the block groups are freed, go through and
    8887             :          * free all the space_info structs.  This is only called during
    8888             :          * the final stages of unmount, and so we know nobody is
    8889             :          * using them.  We call synchronize_rcu() once before we start,
    8890             :          * just to be on the safe side.
    8891             :          */
    8892             :         synchronize_rcu();
    8893             : 
    8894         221 :         release_global_block_rsv(info);
    8895             : 
    8896        1977 :         while (!list_empty(&info->space_info)) {
    8897             :                 int i;
    8898             : 
    8899         657 :                 space_info = list_entry(info->space_info.next,
    8900             :                                         struct btrfs_space_info,
    8901             :                                         list);
    8902         657 :                 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
    8903           0 :                         if (WARN_ON(space_info->bytes_pinned > 0 ||
    8904             :                             space_info->bytes_reserved > 0 ||
    8905             :                             space_info->bytes_may_use > 0)) {
    8906           0 :                                 dump_space_info(space_info, 0, 0);
    8907             :                         }
    8908             :                 }
    8909         657 :                 list_del(&space_info->list);
    8910        5256 :                 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
    8911             :                         struct kobject *kobj;
    8912        4599 :                         kobj = space_info->block_group_kobjs[i];
    8913        4599 :                         space_info->block_group_kobjs[i] = NULL;
    8914        4599 :                         if (kobj) {
    8915        1084 :                                 kobject_del(kobj);
    8916        1084 :                                 kobject_put(kobj);
    8917             :                         }
    8918             :                 }
    8919         657 :                 kobject_del(&space_info->kobj);
    8920         657 :                 kobject_put(&space_info->kobj);
    8921             :         }
    8922         221 :         return 0;
    8923             : }
    8924             : 
    8925        1228 : static void __link_block_group(struct btrfs_space_info *space_info,
    8926             :                                struct btrfs_block_group_cache *cache)
    8927             : {
    8928             :         int index = get_block_group_index(cache);
    8929             :         bool first = false;
    8930             : 
    8931        1228 :         down_write(&space_info->groups_sem);
    8932        2456 :         if (list_empty(&space_info->block_groups[index]))
    8933             :                 first = true;
    8934        1228 :         list_add_tail(&cache->list, &space_info->block_groups[index]);
    8935        1228 :         up_write(&space_info->groups_sem);
    8936             : 
    8937        1228 :         if (first) {
    8938             :                 struct raid_kobject *rkobj;
    8939             :                 int ret;
    8940             : 
    8941        1090 :                 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
    8942        1090 :                 if (!rkobj)
    8943             :                         goto out_err;
    8944        1090 :                 rkobj->raid_type = index;
    8945        1090 :                 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
    8946        2180 :                 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
    8947             :                                   "%s", get_raid_name(index));
    8948        1090 :                 if (ret) {
    8949           0 :                         kobject_put(&rkobj->kobj);
    8950           0 :                         goto out_err;
    8951             :                 }
    8952        1090 :                 space_info->block_group_kobjs[index] = &rkobj->kobj;
    8953             :         }
    8954             : 
    8955        1228 :         return;
    8956             : out_err:
    8957           0 :         pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
    8958             : }
    8959             : 
    8960             : static struct btrfs_block_group_cache *
    8961        1228 : btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
    8962             : {
    8963             :         struct btrfs_block_group_cache *cache;
    8964             : 
    8965        1228 :         cache = kzalloc(sizeof(*cache), GFP_NOFS);
    8966        1228 :         if (!cache)
    8967             :                 return NULL;
    8968             : 
    8969        1228 :         cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
    8970             :                                         GFP_NOFS);
    8971        1228 :         if (!cache->free_space_ctl) {
    8972           0 :                 kfree(cache);
    8973           0 :                 return NULL;
    8974             :         }
    8975             : 
    8976        1228 :         cache->key.objectid = start;
    8977        1228 :         cache->key.offset = size;
    8978        1228 :         cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
    8979             : 
    8980        1228 :         cache->sectorsize = root->sectorsize;
    8981        1228 :         cache->fs_info = root->fs_info;
    8982        1228 :         cache->full_stripe_len = btrfs_full_stripe_len(root,
    8983        1228 :                                                &root->fs_info->mapping_tree,
    8984             :                                                start);
    8985             :         atomic_set(&cache->count, 1);
    8986        1228 :         spin_lock_init(&cache->lock);
    8987        1228 :         init_rwsem(&cache->data_rwsem);
    8988        1228 :         INIT_LIST_HEAD(&cache->list);
    8989        1228 :         INIT_LIST_HEAD(&cache->cluster_list);
    8990        1228 :         INIT_LIST_HEAD(&cache->new_bg_list);
    8991        1228 :         btrfs_init_free_space_ctl(cache);
    8992             : 
    8993        1228 :         return cache;
    8994             : }
    8995             : 
    8996         221 : int btrfs_read_block_groups(struct btrfs_root *root)
    8997             : {
    8998             :         struct btrfs_path *path;
    8999             :         int ret;
    9000             :         struct btrfs_block_group_cache *cache;
    9001         221 :         struct btrfs_fs_info *info = root->fs_info;
    9002             :         struct btrfs_space_info *space_info;
    9003             :         struct btrfs_key key;
    9004             :         struct btrfs_key found_key;
    9005             :         struct extent_buffer *leaf;
    9006             :         int need_clear = 0;
    9007             :         u64 cache_gen;
    9008             : 
    9009         221 :         root = info->extent_root;
    9010         221 :         key.objectid = 0;
    9011         221 :         key.offset = 0;
    9012             :         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
    9013         221 :         path = btrfs_alloc_path();
    9014         221 :         if (!path)
    9015             :                 return -ENOMEM;
    9016         221 :         path->reada = 1;
    9017             : 
    9018         221 :         cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
    9019         442 :         if (btrfs_test_opt(root, SPACE_CACHE) &&
    9020             :             btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
    9021             :                 need_clear = 1;
    9022         221 :         if (btrfs_test_opt(root, CLEAR_CACHE))
    9023             :                 need_clear = 1;
    9024             : 
    9025             :         while (1) {
    9026        1362 :                 ret = find_first_block_group(root, path, &key);
    9027        1362 :                 if (ret > 0)
    9028             :                         break;
    9029        1141 :                 if (ret != 0)
    9030             :                         goto error;
    9031             : 
    9032        1141 :                 leaf = path->nodes[0];
    9033        1141 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    9034             : 
    9035        1141 :                 cache = btrfs_create_block_group_cache(root, found_key.objectid,
    9036             :                                                        found_key.offset);
    9037        1141 :                 if (!cache) {
    9038             :                         ret = -ENOMEM;
    9039             :                         goto error;
    9040             :                 }
    9041             : 
    9042        1141 :                 if (need_clear) {
    9043             :                         /*
    9044             :                          * When we mount with old space cache, we need to
    9045             :                          * set BTRFS_DC_CLEAR and set dirty flag.
    9046             :                          *
    9047             :                          * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
    9048             :                          *    truncate the old free space cache inode and
    9049             :                          *    setup a new one.
    9050             :                          * b) Setting 'dirty flag' makes sure that we flush
    9051             :                          *    the new space cache info onto disk.
    9052             :                          */
    9053         502 :                         cache->disk_cache_state = BTRFS_DC_CLEAR;
    9054         502 :                         if (btrfs_test_opt(root, SPACE_CACHE))
    9055         502 :                                 cache->dirty = 1;
    9056             :                 }
    9057             : 
    9058        1141 :                 read_extent_buffer(leaf, &cache->item,
    9059        1141 :                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
    9060             :                                    sizeof(cache->item));
    9061        1141 :                 cache->flags = btrfs_block_group_flags(&cache->item);
    9062             : 
    9063        1141 :                 key.objectid = found_key.objectid + found_key.offset;
    9064        1141 :                 btrfs_release_path(path);
    9065             : 
    9066             :                 /*
    9067             :                  * We need to exclude the super stripes now so that the space
    9068             :                  * info has super bytes accounted for, otherwise we'll think
    9069             :                  * we have more space than we actually do.
    9070             :                  */
    9071        1141 :                 ret = exclude_super_stripes(root, cache);
    9072        1141 :                 if (ret) {
    9073             :                         /*
    9074             :                          * We may have excluded something, so call this just in
    9075             :                          * case.
    9076             :                          */
    9077           0 :                         free_excluded_extents(root, cache);
    9078           0 :                         btrfs_put_block_group(cache);
    9079           0 :                         goto error;
    9080             :                 }
    9081             : 
    9082             :                 /*
    9083             :                  * check for two cases, either we are full, and therefore
    9084             :                  * don't need to bother with the caching work since we won't
    9085             :                  * find any space, or we are empty, and we can just add all
    9086             :                  * the space in and be done with it.  This saves us _alot_ of
    9087             :                  * time, particularly in the full case.
    9088             :                  */
    9089        2282 :                 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
    9090           7 :                         cache->last_byte_to_unpin = (u64)-1;
    9091           7 :                         cache->cached = BTRFS_CACHE_FINISHED;
    9092           7 :                         free_excluded_extents(root, cache);
    9093        1134 :                 } else if (btrfs_block_group_used(&cache->item) == 0) {
    9094         533 :                         cache->last_byte_to_unpin = (u64)-1;
    9095         533 :                         cache->cached = BTRFS_CACHE_FINISHED;
    9096         533 :                         add_new_free_space(cache, root->fs_info,
    9097             :                                            found_key.objectid,
    9098         533 :                                            found_key.objectid +
    9099             :                                            found_key.offset);
    9100         533 :                         free_excluded_extents(root, cache);
    9101             :                 }
    9102             : 
    9103        1141 :                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
    9104        1141 :                 if (ret) {
    9105           0 :                         btrfs_remove_free_space_cache(cache);
    9106           0 :                         btrfs_put_block_group(cache);
    9107           0 :                         goto error;
    9108             :                 }
    9109             : 
    9110        1141 :                 ret = update_space_info(info, cache->flags, found_key.offset,
    9111             :                                         btrfs_block_group_used(&cache->item),
    9112             :                                         &space_info);
    9113        1141 :                 if (ret) {
    9114           0 :                         btrfs_remove_free_space_cache(cache);
    9115             :                         spin_lock(&info->block_group_cache_lock);
    9116           0 :                         rb_erase(&cache->cache_node,
    9117             :                                  &info->block_group_cache_tree);
    9118             :                         spin_unlock(&info->block_group_cache_lock);
    9119           0 :                         btrfs_put_block_group(cache);
    9120           0 :                         goto error;
    9121             :                 }
    9122             : 
    9123        1141 :                 cache->space_info = space_info;
    9124             :                 spin_lock(&cache->space_info->lock);
    9125        1141 :                 cache->space_info->bytes_readonly += cache->bytes_super;
    9126        1141 :                 spin_unlock(&cache->space_info->lock);
    9127             : 
    9128        1141 :                 __link_block_group(space_info, cache);
    9129             : 
    9130        1141 :                 set_avail_alloc_bits(root->fs_info, cache->flags);
    9131        1141 :                 if (btrfs_chunk_readonly(root, cache->key.objectid))
    9132           0 :                         set_block_group_ro(cache, 1);
    9133             :         }
    9134             : 
    9135         878 :         list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
    9136         657 :                 if (!(get_alloc_profile(root, space_info->flags) &
    9137             :                       (BTRFS_BLOCK_GROUP_RAID10 |
    9138             :                        BTRFS_BLOCK_GROUP_RAID1 |
    9139             :                        BTRFS_BLOCK_GROUP_RAID5 |
    9140             :                        BTRFS_BLOCK_GROUP_RAID6 |
    9141             :                        BTRFS_BLOCK_GROUP_DUP)))
    9142         227 :                         continue;
    9143             :                 /*
    9144             :                  * avoid allocating from un-mirrored block group if there are
    9145             :                  * mirrored block groups.
    9146             :                  */
    9147         430 :                 list_for_each_entry(cache,
    9148             :                                 &space_info->block_groups[BTRFS_RAID_RAID0],
    9149             :                                 list)
    9150           0 :                         set_block_group_ro(cache, 1);
    9151         852 :                 list_for_each_entry(cache,
    9152             :                                 &space_info->block_groups[BTRFS_RAID_SINGLE],
    9153             :                                 list)
    9154         422 :                         set_block_group_ro(cache, 1);
    9155             :         }
    9156             : 
    9157         221 :         init_global_block_rsv(info);
    9158             :         ret = 0;
    9159             : error:
    9160         221 :         btrfs_free_path(path);
    9161         221 :         return ret;
    9162             : }
    9163             : 
    9164          87 : void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
    9165             :                                        struct btrfs_root *root)
    9166             : {
    9167             :         struct btrfs_block_group_cache *block_group, *tmp;
    9168          87 :         struct btrfs_root *extent_root = root->fs_info->extent_root;
    9169             :         struct btrfs_block_group_item item;
    9170             :         struct btrfs_key key;
    9171             :         int ret = 0;
    9172             : 
    9173         174 :         list_for_each_entry_safe(block_group, tmp, &trans->new_bgs,
    9174             :                                  new_bg_list) {
    9175             :                 list_del_init(&block_group->new_bg_list);
    9176             : 
    9177          87 :                 if (ret)
    9178           0 :                         continue;
    9179             : 
    9180             :                 spin_lock(&block_group->lock);
    9181          87 :                 memcpy(&item, &block_group->item, sizeof(item));
    9182          87 :                 memcpy(&key, &block_group->key, sizeof(key));
    9183             :                 spin_unlock(&block_group->lock);
    9184             : 
    9185          87 :                 ret = btrfs_insert_item(trans, extent_root, &key, &item,
    9186             :                                         sizeof(item));
    9187          87 :                 if (ret)
    9188           0 :                         btrfs_abort_transaction(trans, extent_root, ret);
    9189          87 :                 ret = btrfs_finish_chunk_alloc(trans, extent_root,
    9190             :                                                key.objectid, key.offset);
    9191          87 :                 if (ret)
    9192           0 :                         btrfs_abort_transaction(trans, extent_root, ret);
    9193             :         }
    9194          87 : }
    9195             : 
    9196         174 : int btrfs_make_block_group(struct btrfs_trans_handle *trans,
    9197             :                            struct btrfs_root *root, u64 bytes_used,
    9198             :                            u64 type, u64 chunk_objectid, u64 chunk_offset,
    9199             :                            u64 size)
    9200             : {
    9201             :         int ret;
    9202             :         struct btrfs_root *extent_root;
    9203             :         struct btrfs_block_group_cache *cache;
    9204             : 
    9205          87 :         extent_root = root->fs_info->extent_root;
    9206             : 
    9207             :         btrfs_set_log_full_commit(root->fs_info, trans);
    9208             : 
    9209          87 :         cache = btrfs_create_block_group_cache(root, chunk_offset, size);
    9210          87 :         if (!cache)
    9211             :                 return -ENOMEM;
    9212             : 
    9213             :         btrfs_set_block_group_used(&cache->item, bytes_used);
    9214             :         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
    9215             :         btrfs_set_block_group_flags(&cache->item, type);
    9216             : 
    9217          87 :         cache->flags = type;
    9218          87 :         cache->last_byte_to_unpin = (u64)-1;
    9219          87 :         cache->cached = BTRFS_CACHE_FINISHED;
    9220          87 :         ret = exclude_super_stripes(root, cache);
    9221          87 :         if (ret) {
    9222             :                 /*
    9223             :                  * We may have excluded something, so call this just in
    9224             :                  * case.
    9225             :                  */
    9226           0 :                 free_excluded_extents(root, cache);
    9227           0 :                 btrfs_put_block_group(cache);
    9228           0 :                 return ret;
    9229             :         }
    9230             : 
    9231          87 :         add_new_free_space(cache, root->fs_info, chunk_offset,
    9232             :                            chunk_offset + size);
    9233             : 
    9234          87 :         free_excluded_extents(root, cache);
    9235             : 
    9236          87 :         ret = btrfs_add_block_group_cache(root->fs_info, cache);
    9237          87 :         if (ret) {
    9238           0 :                 btrfs_remove_free_space_cache(cache);
    9239           0 :                 btrfs_put_block_group(cache);
    9240           0 :                 return ret;
    9241             :         }
    9242             : 
    9243          87 :         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
    9244             :                                 &cache->space_info);
    9245          87 :         if (ret) {
    9246           0 :                 btrfs_remove_free_space_cache(cache);
    9247           0 :                 spin_lock(&root->fs_info->block_group_cache_lock);
    9248           0 :                 rb_erase(&cache->cache_node,
    9249           0 :                          &root->fs_info->block_group_cache_tree);
    9250           0 :                 spin_unlock(&root->fs_info->block_group_cache_lock);
    9251           0 :                 btrfs_put_block_group(cache);
    9252           0 :                 return ret;
    9253             :         }
    9254          87 :         update_global_block_rsv(root->fs_info);
    9255             : 
    9256          87 :         spin_lock(&cache->space_info->lock);
    9257          87 :         cache->space_info->bytes_readonly += cache->bytes_super;
    9258          87 :         spin_unlock(&cache->space_info->lock);
    9259             : 
    9260          87 :         __link_block_group(cache->space_info, cache);
    9261             : 
    9262          87 :         list_add_tail(&cache->new_bg_list, &trans->new_bgs);
    9263             : 
    9264          87 :         set_avail_alloc_bits(extent_root->fs_info, type);
    9265             : 
    9266          87 :         return 0;
    9267             : }
    9268             : 
    9269           6 : static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
    9270             : {
    9271           6 :         u64 extra_flags = chunk_to_extended(flags) &
    9272             :                                 BTRFS_EXTENDED_PROFILE_MASK;
    9273             : 
    9274             :         write_seqlock(&fs_info->profiles_lock);
    9275           6 :         if (flags & BTRFS_BLOCK_GROUP_DATA)
    9276           0 :                 fs_info->avail_data_alloc_bits &= ~extra_flags;
    9277           6 :         if (flags & BTRFS_BLOCK_GROUP_METADATA)
    9278           3 :                 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
    9279           6 :         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
    9280           3 :                 fs_info->avail_system_alloc_bits &= ~extra_flags;
    9281             :         write_sequnlock(&fs_info->profiles_lock);
    9282           6 : }
    9283             : 
    9284          72 : int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
    9285             :                              struct btrfs_root *root, u64 group_start)
    9286             : {
    9287             :         struct btrfs_path *path;
    9288             :         struct btrfs_block_group_cache *block_group;
    9289             :         struct btrfs_free_cluster *cluster;
    9290          72 :         struct btrfs_root *tree_root = root->fs_info->tree_root;
    9291             :         struct btrfs_key key;
    9292             :         struct inode *inode;
    9293             :         struct kobject *kobj = NULL;
    9294             :         int ret;
    9295             :         int index;
    9296             :         int factor;
    9297             : 
    9298          72 :         root = root->fs_info->extent_root;
    9299             : 
    9300          72 :         block_group = btrfs_lookup_block_group(root->fs_info, group_start);
    9301          72 :         BUG_ON(!block_group);
    9302          72 :         BUG_ON(!block_group->ro);
    9303             : 
    9304             :         /*
    9305             :          * Free the reserved super bytes from this block group before
    9306             :          * remove it.
    9307             :          */
    9308          72 :         free_excluded_extents(root, block_group);
    9309             : 
    9310          72 :         memcpy(&key, &block_group->key, sizeof(key));
    9311             :         index = get_block_group_index(block_group);
    9312          72 :         if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
    9313             :                                   BTRFS_BLOCK_GROUP_RAID1 |
    9314             :                                   BTRFS_BLOCK_GROUP_RAID10))
    9315             :                 factor = 2;
    9316             :         else
    9317             :                 factor = 1;
    9318             : 
    9319             :         /* make sure this block group isn't part of an allocation cluster */
    9320          72 :         cluster = &root->fs_info->data_alloc_cluster;
    9321             :         spin_lock(&cluster->refill_lock);
    9322          72 :         btrfs_return_cluster_to_free_space(block_group, cluster);
    9323             :         spin_unlock(&cluster->refill_lock);
    9324             : 
    9325             :         /*
    9326             :          * make sure this block group isn't part of a metadata
    9327             :          * allocation cluster
    9328             :          */
    9329          72 :         cluster = &root->fs_info->meta_alloc_cluster;
    9330             :         spin_lock(&cluster->refill_lock);
    9331          72 :         btrfs_return_cluster_to_free_space(block_group, cluster);
    9332             :         spin_unlock(&cluster->refill_lock);
    9333             : 
    9334          72 :         path = btrfs_alloc_path();
    9335          72 :         if (!path) {
    9336             :                 ret = -ENOMEM;
    9337             :                 goto out;
    9338             :         }
    9339             : 
    9340          72 :         inode = lookup_free_space_inode(tree_root, block_group, path);
    9341          72 :         if (!IS_ERR(inode)) {
    9342          41 :                 ret = btrfs_orphan_add(trans, inode);
    9343          41 :                 if (ret) {
    9344           0 :                         btrfs_add_delayed_iput(inode);
    9345           0 :                         goto out;
    9346             :                 }
    9347          41 :                 clear_nlink(inode);
    9348             :                 /* One for the block groups ref */
    9349             :                 spin_lock(&block_group->lock);
    9350          41 :                 if (block_group->iref) {
    9351          41 :                         block_group->iref = 0;
    9352          41 :                         block_group->inode = NULL;
    9353             :                         spin_unlock(&block_group->lock);
    9354          41 :                         iput(inode);
    9355             :                 } else {
    9356             :                         spin_unlock(&block_group->lock);
    9357             :                 }
    9358             :                 /* One for our lookup ref */
    9359          41 :                 btrfs_add_delayed_iput(inode);
    9360             :         }
    9361             : 
    9362          72 :         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
    9363          72 :         key.offset = block_group->key.objectid;
    9364          72 :         key.type = 0;
    9365             : 
    9366          72 :         ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
    9367          72 :         if (ret < 0)
    9368             :                 goto out;
    9369          72 :         if (ret > 0)
    9370          31 :                 btrfs_release_path(path);
    9371          72 :         if (ret == 0) {
    9372             :                 ret = btrfs_del_item(trans, tree_root, path);
    9373          41 :                 if (ret)
    9374             :                         goto out;
    9375          41 :                 btrfs_release_path(path);
    9376             :         }
    9377             : 
    9378          72 :         spin_lock(&root->fs_info->block_group_cache_lock);
    9379          72 :         rb_erase(&block_group->cache_node,
    9380          72 :                  &root->fs_info->block_group_cache_tree);
    9381             : 
    9382          72 :         if (root->fs_info->first_logical_byte == block_group->key.objectid)
    9383          22 :                 root->fs_info->first_logical_byte = (u64)-1;
    9384          72 :         spin_unlock(&root->fs_info->block_group_cache_lock);
    9385             : 
    9386          72 :         down_write(&block_group->space_info->groups_sem);
    9387             :         /*
    9388             :          * we must use list_del_init so people can check to see if they
    9389             :          * are still on the list after taking the semaphore
    9390             :          */
    9391          72 :         list_del_init(&block_group->list);
    9392         144 :         if (list_empty(&block_group->space_info->block_groups[index])) {
    9393           6 :                 kobj = block_group->space_info->block_group_kobjs[index];
    9394           6 :                 block_group->space_info->block_group_kobjs[index] = NULL;
    9395           6 :                 clear_avail_alloc_bits(root->fs_info, block_group->flags);
    9396             :         }
    9397          72 :         up_write(&block_group->space_info->groups_sem);
    9398          72 :         if (kobj) {
    9399           6 :                 kobject_del(kobj);
    9400           6 :                 kobject_put(kobj);
    9401             :         }
    9402             : 
    9403          72 :         if (block_group->cached == BTRFS_CACHE_STARTED)
    9404           0 :                 wait_block_group_cache_done(block_group);
    9405             : 
    9406          72 :         btrfs_remove_free_space_cache(block_group);
    9407             : 
    9408          72 :         spin_lock(&block_group->space_info->lock);
    9409          72 :         block_group->space_info->total_bytes -= block_group->key.offset;
    9410          72 :         block_group->space_info->bytes_readonly -= block_group->key.offset;
    9411          72 :         block_group->space_info->disk_total -= block_group->key.offset * factor;
    9412          72 :         spin_unlock(&block_group->space_info->lock);
    9413             : 
    9414          72 :         memcpy(&key, &block_group->key, sizeof(key));
    9415             : 
    9416          72 :         btrfs_clear_space_info_full(root->fs_info);
    9417             : 
    9418          72 :         btrfs_put_block_group(block_group);
    9419          72 :         btrfs_put_block_group(block_group);
    9420             : 
    9421          72 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    9422          72 :         if (ret > 0)
    9423             :                 ret = -EIO;
    9424          72 :         if (ret < 0)
    9425             :                 goto out;
    9426             : 
    9427             :         ret = btrfs_del_item(trans, root, path);
    9428             : out:
    9429          72 :         btrfs_free_path(path);
    9430          72 :         return ret;
    9431             : }
    9432             : 
    9433         221 : int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
    9434             : {
    9435             :         struct btrfs_space_info *space_info;
    9436             :         struct btrfs_super_block *disk_super;
    9437             :         u64 features;
    9438             :         u64 flags;
    9439             :         int mixed = 0;
    9440             :         int ret;
    9441             : 
    9442         221 :         disk_super = fs_info->super_copy;
    9443         221 :         if (!btrfs_super_root(disk_super))
    9444             :                 return 1;
    9445             : 
    9446             :         features = btrfs_super_incompat_flags(disk_super);
    9447         221 :         if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
    9448             :                 mixed = 1;
    9449             : 
    9450             :         flags = BTRFS_BLOCK_GROUP_SYSTEM;
    9451         221 :         ret = update_space_info(fs_info, flags, 0, 0, &space_info);
    9452         221 :         if (ret)
    9453             :                 goto out;
    9454             : 
    9455         221 :         if (mixed) {
    9456             :                 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
    9457           6 :                 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
    9458             :         } else {
    9459             :                 flags = BTRFS_BLOCK_GROUP_METADATA;
    9460         215 :                 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
    9461         215 :                 if (ret)
    9462             :                         goto out;
    9463             : 
    9464             :                 flags = BTRFS_BLOCK_GROUP_DATA;
    9465         215 :                 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
    9466             :         }
    9467             : out:
    9468         221 :         return ret;
    9469             : }
    9470             : 
    9471           0 : int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
    9472             : {
    9473           0 :         return unpin_extent_range(root, start, end);
    9474             : }
    9475             : 
    9476           0 : int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
    9477             :                                u64 num_bytes, u64 *actual_bytes)
    9478             : {
    9479           0 :         return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
    9480             : }
    9481             : 
    9482           0 : int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
    9483             : {
    9484           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    9485             :         struct btrfs_block_group_cache *cache = NULL;
    9486             :         u64 group_trimmed;
    9487             :         u64 start;
    9488             :         u64 end;
    9489             :         u64 trimmed = 0;
    9490           0 :         u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
    9491             :         int ret = 0;
    9492             : 
    9493             :         /*
    9494             :          * try to trim all FS space, our block group may start from non-zero.
    9495             :          */
    9496           0 :         if (range->len == total_bytes)
    9497           0 :                 cache = btrfs_lookup_first_block_group(fs_info, range->start);
    9498             :         else
    9499           0 :                 cache = btrfs_lookup_block_group(fs_info, range->start);
    9500             : 
    9501           0 :         while (cache) {
    9502           0 :                 if (cache->key.objectid >= (range->start + range->len)) {
    9503           0 :                         btrfs_put_block_group(cache);
    9504           0 :                         break;
    9505             :                 }
    9506             : 
    9507           0 :                 start = max(range->start, cache->key.objectid);
    9508           0 :                 end = min(range->start + range->len,
    9509             :                                 cache->key.objectid + cache->key.offset);
    9510             : 
    9511           0 :                 if (end - start >= range->minlen) {
    9512           0 :                         if (!block_group_cache_done(cache)) {
    9513           0 :                                 ret = cache_block_group(cache, 0);
    9514           0 :                                 if (ret) {
    9515           0 :                                         btrfs_put_block_group(cache);
    9516           0 :                                         break;
    9517             :                                 }
    9518           0 :                                 ret = wait_block_group_cache_done(cache);
    9519           0 :                                 if (ret) {
    9520           0 :                                         btrfs_put_block_group(cache);
    9521           0 :                                         break;
    9522             :                                 }
    9523             :                         }
    9524           0 :                         ret = btrfs_trim_block_group(cache,
    9525             :                                                      &group_trimmed,
    9526             :                                                      start,
    9527             :                                                      end,
    9528             :                                                      range->minlen);
    9529             : 
    9530           0 :                         trimmed += group_trimmed;
    9531           0 :                         if (ret) {
    9532           0 :                                 btrfs_put_block_group(cache);
    9533           0 :                                 break;
    9534             :                         }
    9535             :                 }
    9536             : 
    9537           0 :                 cache = next_block_group(fs_info->tree_root, cache);
    9538             :         }
    9539             : 
    9540           0 :         range->len = trimmed;
    9541           0 :         return ret;
    9542             : }
    9543             : 
    9544             : /*
    9545             :  * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(),
    9546             :  * they are used to prevent the some tasks writing data into the page cache
    9547             :  * by nocow before the subvolume is snapshoted, but flush the data into
    9548             :  * the disk after the snapshot creation.
    9549             :  */
    9550        1650 : void btrfs_end_nocow_write(struct btrfs_root *root)
    9551             : {
    9552        1650 :         percpu_counter_dec(&root->subv_writers->counter);
    9553             :         /*
    9554             :          * Make sure counter is updated before we wake up
    9555             :          * waiters.
    9556             :          */
    9557        1650 :         smp_mb();
    9558        3300 :         if (waitqueue_active(&root->subv_writers->wait))
    9559           0 :                 wake_up(&root->subv_writers->wait);
    9560        1650 : }
    9561             : 
    9562        2597 : int btrfs_start_nocow_write(struct btrfs_root *root)
    9563             : {
    9564        2597 :         if (unlikely(atomic_read(&root->will_be_snapshoted)))
    9565             :                 return 0;
    9566             : 
    9567        1650 :         percpu_counter_inc(&root->subv_writers->counter);
    9568             :         /*
    9569             :          * Make sure counter is updated before we check for snapshot creation.
    9570             :          */
    9571        1650 :         smp_mb();
    9572        1650 :         if (unlikely(atomic_read(&root->will_be_snapshoted))) {
    9573           0 :                 btrfs_end_nocow_write(root);
    9574           0 :                 return 0;
    9575             :         }
    9576             :         return 1;
    9577             : }

Generated by: LCOV version 1.10