LCOV - code coverage report
Current view: top level - fs/btrfs - transaction.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 647 831 77.9 %
Date: 2014-11-28 Functions: 38 45 84.4 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : 
      19             : #include <linux/fs.h>
      20             : #include <linux/slab.h>
      21             : #include <linux/sched.h>
      22             : #include <linux/writeback.h>
      23             : #include <linux/pagemap.h>
      24             : #include <linux/blkdev.h>
      25             : #include <linux/uuid.h>
      26             : #include "ctree.h"
      27             : #include "disk-io.h"
      28             : #include "transaction.h"
      29             : #include "locking.h"
      30             : #include "tree-log.h"
      31             : #include "inode-map.h"
      32             : #include "volumes.h"
      33             : #include "dev-replace.h"
      34             : #include "qgroup.h"
      35             : 
      36             : #define BTRFS_ROOT_TRANS_TAG 0
      37             : 
      38             : static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
      39             :         [TRANS_STATE_RUNNING]           = 0U,
      40             :         [TRANS_STATE_BLOCKED]           = (__TRANS_USERSPACE |
      41             :                                            __TRANS_START),
      42             :         [TRANS_STATE_COMMIT_START]      = (__TRANS_USERSPACE |
      43             :                                            __TRANS_START |
      44             :                                            __TRANS_ATTACH),
      45             :         [TRANS_STATE_COMMIT_DOING]      = (__TRANS_USERSPACE |
      46             :                                            __TRANS_START |
      47             :                                            __TRANS_ATTACH |
      48             :                                            __TRANS_JOIN),
      49             :         [TRANS_STATE_UNBLOCKED]         = (__TRANS_USERSPACE |
      50             :                                            __TRANS_START |
      51             :                                            __TRANS_ATTACH |
      52             :                                            __TRANS_JOIN |
      53             :                                            __TRANS_JOIN_NOLOCK),
      54             :         [TRANS_STATE_COMPLETED]         = (__TRANS_USERSPACE |
      55             :                                            __TRANS_START |
      56             :                                            __TRANS_ATTACH |
      57             :                                            __TRANS_JOIN |
      58             :                                            __TRANS_JOIN_NOLOCK),
      59             : };
      60             : 
      61      183480 : void btrfs_put_transaction(struct btrfs_transaction *transaction)
      62             : {
      63      183480 :         WARN_ON(atomic_read(&transaction->use_count) == 0);
      64      366968 :         if (atomic_dec_and_test(&transaction->use_count)) {
      65        4196 :                 BUG_ON(!list_empty(&transaction->list));
      66        2098 :                 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
      67        4370 :                 while (!list_empty(&transaction->pending_chunks)) {
      68             :                         struct extent_map *em;
      69             : 
      70          87 :                         em = list_first_entry(&transaction->pending_chunks,
      71             :                                               struct extent_map, list);
      72          87 :                         list_del_init(&em->list);
      73          87 :                         free_extent_map(em);
      74             :                 }
      75        2098 :                 kmem_cache_free(btrfs_transaction_cachep, transaction);
      76             :         }
      77      183488 : }
      78             : 
      79        2098 : static noinline void switch_commit_roots(struct btrfs_transaction *trans,
      80             :                                          struct btrfs_fs_info *fs_info)
      81             : {
      82             :         struct btrfs_root *root, *tmp;
      83             : 
      84        2098 :         down_write(&fs_info->commit_root_sem);
      85       13123 :         list_for_each_entry_safe(root, tmp, &trans->switch_commits,
      86             :                                  dirty_list) {
      87             :                 list_del_init(&root->dirty_list);
      88       11025 :                 free_extent_buffer(root->commit_root);
      89       11025 :                 root->commit_root = btrfs_root_node(root);
      90       22050 :                 if (is_fstree(root->objectid))
      91        1888 :                         btrfs_unpin_free_ino(root);
      92             :         }
      93        2098 :         up_write(&fs_info->commit_root_sem);
      94        2098 : }
      95             : 
      96             : static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
      97             :                                          unsigned int type)
      98             : {
      99      178891 :         if (type & TRANS_EXTWRITERS)
     100       57513 :                 atomic_inc(&trans->num_extwriters);
     101             : }
     102             : 
     103             : static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
     104             :                                          unsigned int type)
     105             : {
     106      180987 :         if (type & TRANS_EXTWRITERS)
     107       58496 :                 atomic_dec(&trans->num_extwriters);
     108             : }
     109             : 
     110             : static inline void extwriter_counter_init(struct btrfs_transaction *trans,
     111             :                                           unsigned int type)
     112             : {
     113        2098 :         atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
     114             : }
     115             : 
     116             : static inline int extwriter_counter_read(struct btrfs_transaction *trans)
     117             : {
     118             :         return atomic_read(&trans->num_extwriters);
     119             : }
     120             : 
     121             : /*
     122             :  * either allocate a new transaction or hop into the existing one
     123             :  */
     124      182335 : static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
     125             : {
     126             :         struct btrfs_transaction *cur_trans;
     127      182335 :         struct btrfs_fs_info *fs_info = root->fs_info;
     128             : 
     129             :         spin_lock(&fs_info->trans_lock);
     130             : loop:
     131             :         /* The file system has been taken offline. No new transactions. */
     132      182355 :         if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
     133             :                 spin_unlock(&fs_info->trans_lock);
     134             :                 return -EROFS;
     135             :         }
     136             : 
     137      182355 :         cur_trans = fs_info->running_transaction;
     138      182355 :         if (cur_trans) {
     139      178971 :                 if (cur_trans->aborted) {
     140             :                         spin_unlock(&fs_info->trans_lock);
     141           0 :                         return cur_trans->aborted;
     142             :                 }
     143      178971 :                 if (btrfs_blocked_trans_types[cur_trans->state] & type) {
     144             :                         spin_unlock(&fs_info->trans_lock);
     145             :                         return -EBUSY;
     146             :                 }
     147      178891 :                 atomic_inc(&cur_trans->use_count);
     148      178891 :                 atomic_inc(&cur_trans->num_writers);
     149             :                 extwriter_counter_inc(cur_trans, type);
     150             :                 spin_unlock(&fs_info->trans_lock);
     151             :                 return 0;
     152             :         }
     153             :         spin_unlock(&fs_info->trans_lock);
     154             : 
     155             :         /*
     156             :          * If we are ATTACH, we just want to catch the current transaction,
     157             :          * and commit it. If there is no transaction, just return ENOENT.
     158             :          */
     159        3384 :         if (type == TRANS_ATTACH)
     160             :                 return -ENOENT;
     161             : 
     162             :         /*
     163             :          * JOIN_NOLOCK only happens during the transaction commit, so
     164             :          * it is impossible that ->running_transaction is NULL
     165             :          */
     166        2101 :         BUG_ON(type == TRANS_JOIN_NOLOCK);
     167             : 
     168        2101 :         cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
     169        2101 :         if (!cur_trans)
     170             :                 return -ENOMEM;
     171             : 
     172             :         spin_lock(&fs_info->trans_lock);
     173        2101 :         if (fs_info->running_transaction) {
     174             :                 /*
     175             :                  * someone started a transaction after we unlocked.  Make sure
     176             :                  * to redo the checks above
     177             :                  */
     178           3 :                 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
     179             :                 goto loop;
     180        2098 :         } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
     181             :                 spin_unlock(&fs_info->trans_lock);
     182           0 :                 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
     183             :                 return -EROFS;
     184             :         }
     185             : 
     186             :         atomic_set(&cur_trans->num_writers, 1);
     187             :         extwriter_counter_init(cur_trans, type);
     188        2098 :         init_waitqueue_head(&cur_trans->writer_wait);
     189        2098 :         init_waitqueue_head(&cur_trans->commit_wait);
     190        2098 :         cur_trans->state = TRANS_STATE_RUNNING;
     191             :         /*
     192             :          * One for this trans handle, one so it will live on until we
     193             :          * commit the transaction.
     194             :          */
     195             :         atomic_set(&cur_trans->use_count, 2);
     196        2098 :         cur_trans->start_time = get_seconds();
     197             : 
     198        2098 :         cur_trans->delayed_refs.href_root = RB_ROOT;
     199             :         atomic_set(&cur_trans->delayed_refs.num_entries, 0);
     200        2098 :         cur_trans->delayed_refs.num_heads_ready = 0;
     201        2098 :         cur_trans->delayed_refs.num_heads = 0;
     202        2098 :         cur_trans->delayed_refs.flushing = 0;
     203        2098 :         cur_trans->delayed_refs.run_delayed_start = 0;
     204             : 
     205             :         /*
     206             :          * although the tree mod log is per file system and not per transaction,
     207             :          * the log must never go across transaction boundaries.
     208             :          */
     209        2098 :         smp_mb();
     210        4196 :         if (!list_empty(&fs_info->tree_mod_seq_list))
     211           0 :                 WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
     212             :                         "creating a fresh transaction\n");
     213        2098 :         if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
     214           0 :                 WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
     215             :                         "creating a fresh transaction\n");
     216             :         atomic64_set(&fs_info->tree_mod_seq, 0);
     217             : 
     218        2098 :         spin_lock_init(&cur_trans->delayed_refs.lock);
     219             : 
     220        2098 :         INIT_LIST_HEAD(&cur_trans->pending_snapshots);
     221        2098 :         INIT_LIST_HEAD(&cur_trans->pending_chunks);
     222        2098 :         INIT_LIST_HEAD(&cur_trans->switch_commits);
     223        2098 :         list_add_tail(&cur_trans->list, &fs_info->trans_list);
     224        2098 :         extent_io_tree_init(&cur_trans->dirty_pages,
     225        2098 :                              fs_info->btree_inode->i_mapping);
     226        2098 :         fs_info->generation++;
     227        2098 :         cur_trans->transid = fs_info->generation;
     228        2098 :         fs_info->running_transaction = cur_trans;
     229        2098 :         cur_trans->aborted = 0;
     230             :         spin_unlock(&fs_info->trans_lock);
     231             : 
     232             :         return 0;
     233             : }
     234             : 
     235             : /*
     236             :  * this does all the record keeping required to make sure that a reference
     237             :  * counted root is properly recorded in a given transaction.  This is required
     238             :  * to make sure the old root from before we joined the transaction is deleted
     239             :  * when the transaction commits
     240             :  */
     241        2811 : static int record_root_in_trans(struct btrfs_trans_handle *trans,
     242             :                                struct btrfs_root *root)
     243             : {
     244        5622 :         if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
     245        2811 :             root->last_trans < trans->transid) {
     246        2547 :                 WARN_ON(root == root->fs_info->extent_root);
     247        2547 :                 WARN_ON(root->commit_root != root->node);
     248             : 
     249             :                 /*
     250             :                  * see below for IN_TRANS_SETUP usage rules
     251             :                  * we have the reloc mutex held now, so there
     252             :                  * is only one writer in this function
     253             :                  */
     254             :                 set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
     255             : 
     256             :                 /* make sure readers find IN_TRANS_SETUP before
     257             :                  * they find our root->last_trans update
     258             :                  */
     259        2547 :                 smp_wmb();
     260             : 
     261        2547 :                 spin_lock(&root->fs_info->fs_roots_radix_lock);
     262        2547 :                 if (root->last_trans == trans->transid) {
     263           0 :                         spin_unlock(&root->fs_info->fs_roots_radix_lock);
     264           0 :                         return 0;
     265             :                 }
     266        2547 :                 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
     267        2547 :                            (unsigned long)root->root_key.objectid,
     268             :                            BTRFS_ROOT_TRANS_TAG);
     269        2547 :                 spin_unlock(&root->fs_info->fs_roots_radix_lock);
     270        2547 :                 root->last_trans = trans->transid;
     271             : 
     272             :                 /* this is pretty tricky.  We don't want to
     273             :                  * take the relocation lock in btrfs_record_root_in_trans
     274             :                  * unless we're really doing the first setup for this root in
     275             :                  * this transaction.
     276             :                  *
     277             :                  * Normally we'd use root->last_trans as a flag to decide
     278             :                  * if we want to take the expensive mutex.
     279             :                  *
     280             :                  * But, we have to set root->last_trans before we
     281             :                  * init the relocation root, otherwise, we trip over warnings
     282             :                  * in ctree.c.  The solution used here is to flag ourselves
     283             :                  * with root IN_TRANS_SETUP.  When this is 1, we're still
     284             :                  * fixing up the reloc trees and everyone must wait.
     285             :                  *
     286             :                  * When this is zero, they can trust root->last_trans and fly
     287             :                  * through btrfs_record_root_in_trans without having to take the
     288             :                  * lock.  smp_wmb() makes sure that all the writes above are
     289             :                  * done before we pop in the zero below
     290             :                  */
     291        2547 :                 btrfs_init_reloc_root(trans, root);
     292        2547 :                 smp_mb__before_atomic();
     293             :                 clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
     294             :         }
     295             :         return 0;
     296             : }
     297             : 
     298             : 
     299      186565 : int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
     300             :                                struct btrfs_root *root)
     301             : {
     302      186565 :         if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
     303             :                 return 0;
     304             : 
     305             :         /*
     306             :          * see record_root_in_trans for comments about IN_TRANS_SETUP usage
     307             :          * and barriers
     308             :          */
     309      167863 :         smp_rmb();
     310      333208 :         if (root->last_trans == trans->transid &&
     311             :             !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
     312             :                 return 0;
     313             : 
     314        2520 :         mutex_lock(&root->fs_info->reloc_mutex);
     315        2519 :         record_root_in_trans(trans, root);
     316        2519 :         mutex_unlock(&root->fs_info->reloc_mutex);
     317             : 
     318        2519 :         return 0;
     319             : }
     320             : 
     321             : static inline int is_transaction_blocked(struct btrfs_transaction *trans)
     322             : {
     323       57886 :         return (trans->state >= TRANS_STATE_BLOCKED &&
     324       58054 :                 trans->state < TRANS_STATE_UNBLOCKED &&
     325         168 :                 !trans->aborted);
     326             : }
     327             : 
     328             : /* wait for commit against the current transaction to become unblocked
     329             :  * when this is done, it is safe to start a new transaction, but the current
     330             :  * transaction might not be fully on disk.
     331             :  */
     332       58754 : static void wait_current_trans(struct btrfs_root *root)
     333             : {
     334       57838 :         struct btrfs_transaction *cur_trans;
     335             : 
     336       58754 :         spin_lock(&root->fs_info->trans_lock);
     337       58755 :         cur_trans = root->fs_info->running_transaction;
     338      116593 :         if (cur_trans && is_transaction_blocked(cur_trans)) {
     339         167 :                 atomic_inc(&cur_trans->use_count);
     340         167 :                 spin_unlock(&root->fs_info->trans_lock);
     341             : 
     342         334 :                 wait_event(root->fs_info->transaction_wait,
     343             :                            cur_trans->state >= TRANS_STATE_UNBLOCKED ||
     344             :                            cur_trans->aborted);
     345         167 :                 btrfs_put_transaction(cur_trans);
     346             :         } else {
     347             :                 spin_unlock(&root->fs_info->trans_lock);
     348             :         }
     349       58755 : }
     350             : 
     351             : static int may_wait_transaction(struct btrfs_root *root, int type)
     352             : {
     353      186619 :         if (root->fs_info->log_root_recovering)
     354             :                 return 0;
     355             : 
     356      186620 :         if (type == TRANS_USERSPACE)
     357             :                 return 1;
     358             : 
     359      244018 :         if (type == TRANS_START &&
     360             :             !atomic_read(&root->fs_info->open_ioctl_trans))
     361             :                 return 1;
     362             : 
     363             :         return 0;
     364             : }
     365             : 
     366             : static inline bool need_reserve_reloc_root(struct btrfs_root *root)
     367             : {
     368       52844 :         if (!root->fs_info->reloc_ctl ||
     369        1021 :             !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
     370        2042 :             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
     371        1021 :             root->reloc_root)
     372             :                 return false;
     373             : 
     374             :         return true;
     375             : }
     376             : 
     377             : static struct btrfs_trans_handle *
     378      608160 : start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
     379             :                   enum btrfs_reserve_flush_enum flush)
     380             : {
     381             :         struct btrfs_trans_handle *h;
     382             :         struct btrfs_transaction *cur_trans;
     383             :         u64 num_bytes = 0;
     384             :         u64 qgroup_reserved = 0;
     385             :         bool reloc_reserved = false;
     386             :         int ret;
     387             : 
     388             :         /* Send isn't supposed to start transactions. */
     389             :         ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB);
     390             : 
     391      374758 :         if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
     392             :                 return ERR_PTR(-EROFS);
     393             : 
     394      187383 :         if (current->journal_info) {
     395        5090 :                 WARN_ON(type & TRANS_EXTWRITERS);
     396        5090 :                 h = current->journal_info;
     397        5090 :                 h->use_count++;
     398        5090 :                 WARN_ON(h->use_count > 2);
     399        5090 :                 h->orig_rsv = h->block_rsv;
     400        5090 :                 h->block_rsv = NULL;
     401        5090 :                 goto got_it;
     402             :         }
     403             : 
     404             :         /*
     405             :          * Do the reservation before we join the transaction so we can do all
     406             :          * the appropriate flushing if need be.
     407             :          */
     408      182293 :         if (num_items > 0 && root != root->fs_info->chunk_root) {
     409       57905 :                 if (root->fs_info->quota_enabled &&
     410        6082 :                     is_fstree(root->root_key.objectid)) {
     411        6082 :                         qgroup_reserved = num_items * root->leafsize;
     412        6082 :                         ret = btrfs_qgroup_reserve(root, qgroup_reserved);
     413        6082 :                         if (ret)
     414           0 :                                 return ERR_PTR(ret);
     415             :                 }
     416             : 
     417       51823 :                 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
     418             :                 /*
     419             :                  * Do the reservation for the relocation root creation
     420             :                  */
     421       51823 :                 if (unlikely(need_reserve_reloc_root(root))) {
     422           2 :                         num_bytes += root->nodesize;
     423             :                         reloc_reserved = true;
     424             :                 }
     425             : 
     426       51823 :                 ret = btrfs_block_rsv_add(root,
     427             :                                           &root->fs_info->trans_block_rsv,
     428             :                                           num_bytes, flush);
     429       51824 :                 if (ret)
     430             :                         goto reserve_fail;
     431             :         }
     432             : again:
     433      182287 :         h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
     434      182279 :         if (!h) {
     435             :                 ret = -ENOMEM;
     436             :                 goto alloc_fail;
     437             :         }
     438             : 
     439             :         /*
     440             :          * If we are JOIN_NOLOCK we're already committing a transaction and
     441             :          * waiting on this guy, so we don't need to do the sb_start_intwrite
     442             :          * because we're already holding a ref.  We need this because we could
     443             :          * have raced in and did an fsync() on a file which can kick a commit
     444             :          * and then we deadlock with somebody doing a freeze.
     445             :          *
     446             :          * If we are ATTACH, it means we just want to catch the current
     447             :          * transaction and commit it, so we needn't do sb_start_intwrite(). 
     448             :          */
     449      182279 :         if (type & __TRANS_FREEZABLE)
     450      175806 :                 sb_start_intwrite(root->fs_info->sb);
     451             : 
     452      364588 :         if (may_wait_transaction(root, type))
     453       57398 :                 wait_current_trans(root);
     454             : 
     455             :         do {
     456      182339 :                 ret = join_transaction(root, type);
     457      182349 :                 if (ret == -EBUSY) {
     458          80 :                         wait_current_trans(root);
     459          80 :                         if (unlikely(type == TRANS_ATTACH))
     460             :                                 ret = -ENOENT;
     461             :                 }
     462      182349 :         } while (ret == -EBUSY);
     463             : 
     464      182305 :         if (ret < 0) {
     465             :                 /* We must get the transaction if we are JOIN_NOLOCK. */
     466        1319 :                 BUG_ON(type == TRANS_JOIN_NOLOCK);
     467             :                 goto join_fail;
     468             :         }
     469             : 
     470      180986 :         cur_trans = root->fs_info->running_transaction;
     471             : 
     472      180986 :         h->transid = cur_trans->transid;
     473      180986 :         h->transaction = cur_trans;
     474      180986 :         h->blocks_used = 0;
     475      180986 :         h->bytes_reserved = 0;
     476      180986 :         h->root = root;
     477      180986 :         h->delayed_ref_updates = 0;
     478      180986 :         h->use_count = 1;
     479      180986 :         h->adding_csums = 0;
     480      180986 :         h->block_rsv = NULL;
     481      180986 :         h->orig_rsv = NULL;
     482      180986 :         h->aborted = 0;
     483      180986 :         h->qgroup_reserved = 0;
     484      180986 :         h->delayed_ref_elem.seq = 0;
     485      180986 :         h->type = type;
     486      180986 :         h->allocating_chunk = false;
     487      180986 :         h->reloc_reserved = false;
     488      180986 :         h->sync = false;
     489      180986 :         INIT_LIST_HEAD(&h->qgroup_ref_list);
     490      180986 :         INIT_LIST_HEAD(&h->new_bgs);
     491             : 
     492      180986 :         smp_mb();
     493      185306 :         if (cur_trans->state >= TRANS_STATE_BLOCKED &&
     494             :             may_wait_transaction(root, type)) {
     495           0 :                 current->journal_info = h;
     496           0 :                 btrfs_commit_transaction(h, root);
     497           0 :                 goto again;
     498             :         }
     499             : 
     500      180981 :         if (num_bytes) {
     501       51824 :                 trace_btrfs_space_reservation(root->fs_info, "transaction",
     502             :                                               h->transid, num_bytes, 1);
     503       51824 :                 h->block_rsv = &root->fs_info->trans_block_rsv;
     504       51824 :                 h->bytes_reserved = num_bytes;
     505       51824 :                 h->reloc_reserved = reloc_reserved;
     506             :         }
     507      180981 :         h->qgroup_reserved = qgroup_reserved;
     508             : 
     509             : got_it:
     510      186071 :         btrfs_record_root_in_trans(h, root);
     511             : 
     512      186065 :         if (!current->journal_info && type != TRANS_USERSPACE)
     513      180976 :                 current->journal_info = h;
     514      186065 :         return h;
     515             : 
     516             : join_fail:
     517        1319 :         if (type & __TRANS_FREEZABLE)
     518           0 :                 sb_end_intwrite(root->fs_info->sb);
     519        1319 :         kmem_cache_free(btrfs_trans_handle_cachep, h);
     520             : alloc_fail:
     521        1312 :         if (num_bytes)
     522           0 :                 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
     523             :                                         num_bytes);
     524             : reserve_fail:
     525        1319 :         if (qgroup_reserved)
     526           0 :                 btrfs_qgroup_free(root, qgroup_reserved);
     527        2638 :         return ERR_PTR(ret);
     528             : }
     529             : 
     530       57393 : struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
     531             :                                                    int num_items)
     532             : {
     533       57397 :         return start_transaction(root, num_items, TRANS_START,
     534             :                                  BTRFS_RESERVE_FLUSH_ALL);
     535             : }
     536             : 
     537           0 : struct btrfs_trans_handle *btrfs_start_transaction_lflush(
     538             :                                         struct btrfs_root *root, int num_items)
     539             : {
     540           0 :         return start_transaction(root, num_items, TRANS_START,
     541             :                                  BTRFS_RESERVE_FLUSH_LIMIT);
     542             : }
     543             : 
     544      119455 : struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
     545             : {
     546      119455 :         return start_transaction(root, 0, TRANS_JOIN, 0);
     547             : }
     548             : 
     549        8114 : struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
     550             : {
     551        8114 :         return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0);
     552             : }
     553             : 
     554           0 : struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
     555             : {
     556           0 :         return start_transaction(root, 0, TRANS_USERSPACE, 0);
     557             : }
     558             : 
     559             : /*
     560             :  * btrfs_attach_transaction() - catch the running transaction
     561             :  *
     562             :  * It is used when we want to commit the current the transaction, but
     563             :  * don't want to start a new one.
     564             :  *
     565             :  * Note: If this function return -ENOENT, it just means there is no
     566             :  * running transaction. But it is possible that the inactive transaction
     567             :  * is still in the memory, not fully on disk. If you hope there is no
     568             :  * inactive transaction in the fs when -ENOENT is returned, you should
     569             :  * invoke
     570             :  *     btrfs_attach_transaction_barrier()
     571             :  */
     572          18 : struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
     573             : {
     574          18 :         return start_transaction(root, 0, TRANS_ATTACH, 0);
     575             : }
     576             : 
     577             : /*
     578             :  * btrfs_attach_transaction_barrier() - catch the running transaction
     579             :  *
     580             :  * It is similar to the above function, the differentia is this one
     581             :  * will wait for all the inactive transactions until they fully
     582             :  * complete.
     583             :  */
     584             : struct btrfs_trans_handle *
     585        2397 : btrfs_attach_transaction_barrier(struct btrfs_root *root)
     586             : {
     587             :         struct btrfs_trans_handle *trans;
     588             : 
     589        2397 :         trans = start_transaction(root, 0, TRANS_ATTACH, 0);
     590        3715 :         if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
     591        1317 :                 btrfs_wait_for_commit(root, 0);
     592             : 
     593        2397 :         return trans;
     594             : }
     595             : 
     596             : /* wait for a transaction commit to be fully complete */
     597         241 : static noinline void wait_for_commit(struct btrfs_root *root,
     598             :                                     struct btrfs_transaction *commit)
     599             : {
     600         482 :         wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED);
     601         241 : }
     602             : 
     603        1315 : int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
     604             : {
     605             :         struct btrfs_transaction *cur_trans = NULL, *t;
     606             :         int ret = 0;
     607             : 
     608        1315 :         if (transid) {
     609           0 :                 if (transid <= root->fs_info->last_trans_committed)
     610             :                         goto out;
     611             : 
     612             :                 ret = -EINVAL;
     613             :                 /* find specified transaction */
     614             :                 spin_lock(&root->fs_info->trans_lock);
     615           0 :                 list_for_each_entry(t, &root->fs_info->trans_list, list) {
     616           0 :                         if (t->transid == transid) {
     617             :                                 cur_trans = t;
     618           0 :                                 atomic_inc(&cur_trans->use_count);
     619             :                                 ret = 0;
     620           0 :                                 break;
     621             :                         }
     622           0 :                         if (t->transid > transid) {
     623             :                                 ret = 0;
     624             :                                 break;
     625             :                         }
     626             :                 }
     627           0 :                 spin_unlock(&root->fs_info->trans_lock);
     628             :                 /* The specified transaction doesn't exist */
     629           0 :                 if (!cur_trans)
     630             :                         goto out;
     631             :         } else {
     632             :                 /* find newest transaction that is committing | committed */
     633        1315 :                 spin_lock(&root->fs_info->trans_lock);
     634        1322 :                 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
     635             :                                             list) {
     636          95 :                         if (t->state >= TRANS_STATE_COMMIT_START) {
     637          90 :                                 if (t->state == TRANS_STATE_COMPLETED)
     638             :                                         break;
     639             :                                 cur_trans = t;
     640          90 :                                 atomic_inc(&cur_trans->use_count);
     641             :                                 break;
     642             :                         }
     643             :                 }
     644        1317 :                 spin_unlock(&root->fs_info->trans_lock);
     645        1316 :                 if (!cur_trans)
     646             :                         goto out;  /* nothing committing|committed */
     647             :         }
     648             : 
     649          90 :         wait_for_commit(root, cur_trans);
     650          90 :         btrfs_put_transaction(cur_trans);
     651             : out:
     652        1316 :         return ret;
     653             : }
     654             : 
     655        1276 : void btrfs_throttle(struct btrfs_root *root)
     656             : {
     657        2552 :         if (!atomic_read(&root->fs_info->open_ioctl_trans))
     658        1276 :                 wait_current_trans(root);
     659        1276 : }
     660             : 
     661      174837 : static int should_end_transaction(struct btrfs_trans_handle *trans,
     662             :                                   struct btrfs_root *root)
     663             : {
     664      174837 :         if (root->fs_info->global_block_rsv.space_info->full &&
     665           0 :             btrfs_check_space_for_delayed_refs(trans, root))
     666             :                 return 1;
     667             : 
     668      174837 :         return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
     669             : }
     670             : 
     671          14 : int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
     672             :                                  struct btrfs_root *root)
     673             : {
     674          14 :         struct btrfs_transaction *cur_trans = trans->transaction;
     675             :         int updates;
     676             :         int err;
     677             : 
     678          14 :         smp_mb();
     679          28 :         if (cur_trans->state >= TRANS_STATE_BLOCKED ||
     680          14 :             cur_trans->delayed_refs.flushing)
     681             :                 return 1;
     682             : 
     683          14 :         updates = trans->delayed_ref_updates;
     684          14 :         trans->delayed_ref_updates = 0;
     685          14 :         if (updates) {
     686          12 :                 err = btrfs_run_delayed_refs(trans, root, updates);
     687          12 :                 if (err) /* Error code will also eval true */
     688             :                         return err;
     689             :         }
     690             : 
     691          14 :         return should_end_transaction(trans, root);
     692             : }
     693             : 
     694      183965 : static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
     695             :                           struct btrfs_root *root, int throttle)
     696             : {
     697      183965 :         struct btrfs_transaction *cur_trans = trans->transaction;
     698      183965 :         struct btrfs_fs_info *info = root->fs_info;
     699      183965 :         unsigned long cur = trans->delayed_ref_updates;
     700      183965 :         int lock = (trans->type != TRANS_JOIN_NOLOCK);
     701             :         int err = 0;
     702             :         int must_run_delayed_refs = 0;
     703             : 
     704      183965 :         if (trans->use_count > 1) {
     705        5090 :                 trans->use_count--;
     706        5090 :                 trans->block_rsv = trans->orig_rsv;
     707        5090 :                 return 0;
     708             :         }
     709             : 
     710      178875 :         btrfs_trans_release_metadata(trans, root);
     711      178864 :         trans->block_rsv = NULL;
     712             : 
     713      357728 :         if (!list_empty(&trans->new_bgs))
     714          65 :                 btrfs_create_pending_block_groups(trans, root);
     715             : 
     716      178868 :         trans->delayed_ref_updates = 0;
     717      178868 :         if (!trans->sync) {
     718      175224 :                 must_run_delayed_refs =
     719             :                         btrfs_should_throttle_delayed_refs(trans, root);
     720      175244 :                 cur = max_t(unsigned long, cur, 32);
     721             : 
     722             :                 /*
     723             :                  * don't make the caller wait if they are from a NOLOCK
     724             :                  * or ATTACH transaction, it will deadlock with commit
     725             :                  */
     726      175275 :                 if (must_run_delayed_refs == 1 &&
     727          31 :                     (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH)))
     728             :                         must_run_delayed_refs = 2;
     729             :         }
     730             : 
     731      178888 :         if (trans->qgroup_reserved) {
     732             :                 /*
     733             :                  * the same root has to be passed here between start_transaction
     734             :                  * and end_transaction. Subvolume quota depends on this.
     735             :                  */
     736        6082 :                 btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
     737        6082 :                 trans->qgroup_reserved = 0;
     738             :         }
     739             : 
     740      178888 :         btrfs_trans_release_metadata(trans, root);
     741      178883 :         trans->block_rsv = NULL;
     742             : 
     743      178883 :         if (!list_empty(&trans->new_bgs))
     744           0 :                 btrfs_create_pending_block_groups(trans, root);
     745             : 
     746      528537 :         if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
     747      174828 :             should_end_transaction(trans, root) &&
     748           2 :             ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
     749             :                 spin_lock(&info->trans_lock);
     750           1 :                 if (cur_trans->state == TRANS_STATE_RUNNING)
     751           1 :                         cur_trans->state = TRANS_STATE_BLOCKED;
     752             :                 spin_unlock(&info->trans_lock);
     753             :         }
     754             : 
     755      178885 :         if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
     756           2 :                 if (throttle)
     757           0 :                         return btrfs_commit_transaction(trans, root);
     758             :                 else
     759           2 :                         wake_up_process(info->transaction_kthread);
     760             :         }
     761             : 
     762      178885 :         if (trans->type & __TRANS_FREEZABLE)
     763      174749 :                 sb_end_intwrite(root->fs_info->sb);
     764             : 
     765      178882 :         WARN_ON(cur_trans != info->running_transaction);
     766      178882 :         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
     767      178882 :         atomic_dec(&cur_trans->num_writers);
     768      178889 :         extwriter_counter_dec(cur_trans, trans->type);
     769             : 
     770      178889 :         smp_mb();
     771      178885 :         if (waitqueue_active(&cur_trans->writer_wait))
     772          82 :                 wake_up(&cur_trans->writer_wait);
     773      178885 :         btrfs_put_transaction(cur_trans);
     774             : 
     775      178884 :         if (current->journal_info == trans)
     776      178882 :                 current->journal_info = NULL;
     777             : 
     778      178884 :         if (throttle)
     779        3562 :                 btrfs_run_delayed_iputs(root);
     780             : 
     781      357768 :         if (trans->aborted ||
     782      178884 :             test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
     783           0 :                 wake_up_process(info->transaction_kthread);
     784             :                 err = -EIO;
     785             :         }
     786      178884 :         assert_qgroups_uptodate(trans);
     787             : 
     788      178879 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
     789      178869 :         if (must_run_delayed_refs) {
     790        2152 :                 btrfs_async_run_delayed_refs(root, cur,
     791             :                                              must_run_delayed_refs == 1);
     792             :         }
     793      178869 :         return err;
     794             : }
     795             : 
     796      180401 : int btrfs_end_transaction(struct btrfs_trans_handle *trans,
     797             :                           struct btrfs_root *root)
     798             : {
     799      180405 :         return __btrfs_end_transaction(trans, root, 0);
     800             : }
     801             : 
     802        3562 : int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
     803             :                                    struct btrfs_root *root)
     804             : {
     805        3562 :         return __btrfs_end_transaction(trans, root, 1);
     806             : }
     807             : 
     808             : /*
     809             :  * when btree blocks are allocated, they have some corresponding bits set for
     810             :  * them in one of two extent_io trees.  This is used to make sure all of
     811             :  * those extents are sent to disk but does not wait on them
     812             :  */
     813        5052 : int btrfs_write_marked_extents(struct btrfs_root *root,
     814             :                                struct extent_io_tree *dirty_pages, int mark)
     815             : {
     816             :         int err = 0;
     817             :         int werr = 0;
     818        5052 :         struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
     819        5052 :         struct extent_state *cached_state = NULL;
     820        5052 :         u64 start = 0;
     821             :         u64 end;
     822             : 
     823       30139 :         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
     824             :                                       mark, &cached_state)) {
     825       20035 :                 convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
     826             :                                    mark, &cached_state, GFP_NOFS);
     827       20035 :                 cached_state = NULL;
     828       20035 :                 err = filemap_fdatawrite_range(mapping, start, end);
     829       20035 :                 if (err)
     830             :                         werr = err;
     831       20035 :                 cond_resched();
     832       20035 :                 start = end + 1;
     833             :         }
     834        5052 :         if (err)
     835             :                 werr = err;
     836        5052 :         return werr;
     837             : }
     838             : 
     839             : /*
     840             :  * when btree blocks are allocated, they have some corresponding bits set for
     841             :  * them in one of two extent_io trees.  This is used to make sure all of
     842             :  * those extents are on disk for transaction or log commit.  We wait
     843             :  * on all the pages and clear them from the dirty pages state tree
     844             :  */
     845        5052 : int btrfs_wait_marked_extents(struct btrfs_root *root,
     846             :                               struct extent_io_tree *dirty_pages, int mark)
     847             : {
     848             :         int err = 0;
     849             :         int werr = 0;
     850        5052 :         struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
     851        5052 :         struct extent_state *cached_state = NULL;
     852        5052 :         u64 start = 0;
     853             :         u64 end;
     854             : 
     855       30139 :         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
     856             :                                       EXTENT_NEED_WAIT, &cached_state)) {
     857       20035 :                 clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
     858             :                                  0, 0, &cached_state, GFP_NOFS);
     859       20035 :                 err = filemap_fdatawait_range(mapping, start, end);
     860       20035 :                 if (err)
     861             :                         werr = err;
     862       20035 :                 cond_resched();
     863       20035 :                 start = end + 1;
     864             :         }
     865        5052 :         if (err)
     866             :                 werr = err;
     867        5052 :         return werr;
     868             : }
     869             : 
     870             : /*
     871             :  * when btree blocks are allocated, they have some corresponding bits set for
     872             :  * them in one of two extent_io trees.  This is used to make sure all of
     873             :  * those extents are on disk for transaction or log commit
     874             :  */
     875        2098 : static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
     876             :                                 struct extent_io_tree *dirty_pages, int mark)
     877             : {
     878             :         int ret;
     879             :         int ret2;
     880             :         struct blk_plug plug;
     881             : 
     882        2098 :         blk_start_plug(&plug);
     883        2098 :         ret = btrfs_write_marked_extents(root, dirty_pages, mark);
     884        2098 :         blk_finish_plug(&plug);
     885        2098 :         ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
     886             : 
     887        2098 :         if (ret)
     888             :                 return ret;
     889        2098 :         if (ret2)
     890           0 :                 return ret2;
     891             :         return 0;
     892             : }
     893             : 
     894        2098 : int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
     895             :                                      struct btrfs_root *root)
     896             : {
     897        2098 :         if (!trans || !trans->transaction) {
     898             :                 struct inode *btree_inode;
     899           0 :                 btree_inode = root->fs_info->btree_inode;
     900           0 :                 return filemap_write_and_wait(btree_inode->i_mapping);
     901             :         }
     902        2098 :         return btrfs_write_and_wait_marked_extents(root,
     903             :                                            &trans->transaction->dirty_pages,
     904             :                                            EXTENT_DIRTY);
     905             : }
     906             : 
     907             : /*
     908             :  * this is used to update the root pointer in the tree of tree roots.
     909             :  *
     910             :  * But, in the case of the extent allocation tree, updating the root
     911             :  * pointer may allocate blocks which may change the root of the extent
     912             :  * allocation tree.
     913             :  *
     914             :  * So, this loops and repeats and makes sure the cowonly root didn't
     915             :  * change while the root pointer was being updated in the metadata.
     916             :  */
     917        4834 : static int update_cowonly_root(struct btrfs_trans_handle *trans,
     918             :                                struct btrfs_root *root)
     919             : {
     920             :         int ret;
     921             :         u64 old_root_bytenr;
     922             :         u64 old_root_used;
     923        4834 :         struct btrfs_root *tree_root = root->fs_info->tree_root;
     924             : 
     925             :         old_root_used = btrfs_root_used(&root->root_item);
     926        4834 :         btrfs_write_dirty_block_groups(trans, root);
     927             : 
     928             :         while (1) {
     929             :                 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
     930       14498 :                 if (old_root_bytenr == root->node->start &&
     931             :                     old_root_used == btrfs_root_used(&root->root_item))
     932             :                         break;
     933             : 
     934        4829 :                 btrfs_set_root_node(&root->root_item, root->node);
     935        4829 :                 ret = btrfs_update_root(trans, tree_root,
     936             :                                         &root->root_key,
     937             :                                         &root->root_item);
     938        4829 :                 if (ret)
     939             :                         return ret;
     940             : 
     941             :                 old_root_used = btrfs_root_used(&root->root_item);
     942        4829 :                 ret = btrfs_write_dirty_block_groups(trans, root);
     943        4829 :                 if (ret)
     944             :                         return ret;
     945             :         }
     946             : 
     947             :         return 0;
     948             : }
     949             : 
     950             : /*
     951             :  * update all the cowonly tree roots on disk
     952             :  *
     953             :  * The error handling in this function may not be obvious. Any of the
     954             :  * failures will cause the file system to go offline. We still need
     955             :  * to clean up the delayed refs.
     956             :  */
     957        2098 : static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
     958             :                                          struct btrfs_root *root)
     959             : {
     960        2098 :         struct btrfs_fs_info *fs_info = root->fs_info;
     961             :         struct list_head *next;
     962             :         struct extent_buffer *eb;
     963             :         int ret;
     964             : 
     965        2098 :         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
     966        2098 :         if (ret)
     967             :                 return ret;
     968             : 
     969        2098 :         eb = btrfs_lock_root_node(fs_info->tree_root);
     970        2098 :         ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
     971             :                               0, &eb);
     972        2098 :         btrfs_tree_unlock(eb);
     973        2098 :         free_extent_buffer(eb);
     974             : 
     975        2098 :         if (ret)
     976             :                 return ret;
     977             : 
     978        2098 :         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
     979        2098 :         if (ret)
     980             :                 return ret;
     981             : 
     982        2098 :         ret = btrfs_run_dev_stats(trans, root->fs_info);
     983        2098 :         if (ret)
     984             :                 return ret;
     985        2098 :         ret = btrfs_run_dev_replace(trans, root->fs_info);
     986        2098 :         if (ret)
     987             :                 return ret;
     988        2098 :         ret = btrfs_run_qgroups(trans, root->fs_info);
     989        2098 :         if (ret)
     990             :                 return ret;
     991             : 
     992             :         /* run_qgroups might have added some more refs */
     993        2098 :         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
     994        2098 :         if (ret)
     995             :                 return ret;
     996             : 
     997       13864 :         while (!list_empty(&fs_info->dirty_cowonly_roots)) {
     998             :                 next = fs_info->dirty_cowonly_roots.next;
     999             :                 list_del_init(next);
    1000        4834 :                 root = list_entry(next, struct btrfs_root, dirty_list);
    1001             : 
    1002        4834 :                 if (root != fs_info->extent_root)
    1003        2729 :                         list_add_tail(&root->dirty_list,
    1004        2729 :                                       &trans->transaction->switch_commits);
    1005        4834 :                 ret = update_cowonly_root(trans, root);
    1006        4834 :                 if (ret)
    1007             :                         return ret;
    1008             :         }
    1009             : 
    1010        2098 :         list_add_tail(&fs_info->extent_root->dirty_list,
    1011        2098 :                       &trans->transaction->switch_commits);
    1012        2098 :         btrfs_after_dev_replace_commit(fs_info);
    1013             : 
    1014        2098 :         return 0;
    1015             : }
    1016             : 
    1017             : /*
    1018             :  * dead roots are old snapshots that need to be deleted.  This allocates
    1019             :  * a dirty root struct and adds it into the list of dead roots that need to
    1020             :  * be deleted
    1021             :  */
    1022         105 : void btrfs_add_dead_root(struct btrfs_root *root)
    1023             : {
    1024         105 :         spin_lock(&root->fs_info->trans_lock);
    1025         210 :         if (list_empty(&root->root_list))
    1026         105 :                 list_add_tail(&root->root_list, &root->fs_info->dead_roots);
    1027         105 :         spin_unlock(&root->fs_info->trans_lock);
    1028         105 : }
    1029             : 
    1030             : /*
    1031             :  * update all the cowonly tree roots on disk
    1032             :  */
    1033        2098 : static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
    1034             :                                     struct btrfs_root *root)
    1035             : {
    1036             :         struct btrfs_root *gang[8];
    1037        2098 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1038             :         int i;
    1039             :         int ret;
    1040             :         int err = 0;
    1041             : 
    1042             :         spin_lock(&fs_info->fs_roots_radix_lock);
    1043             :         while (1) {
    1044        3707 :                 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
    1045             :                                                  (void **)gang, 0,
    1046             :                                                  ARRAY_SIZE(gang),
    1047             :                                                  BTRFS_ROOT_TRANS_TAG);
    1048        3707 :                 if (ret == 0)
    1049             :                         break;
    1050        2548 :                 for (i = 0; i < ret; i++) {
    1051        2548 :                         root = gang[i];
    1052        2548 :                         radix_tree_tag_clear(&fs_info->fs_roots_radix,
    1053        2548 :                                         (unsigned long)root->root_key.objectid,
    1054             :                                         BTRFS_ROOT_TRANS_TAG);
    1055             :                         spin_unlock(&fs_info->fs_roots_radix_lock);
    1056             : 
    1057        2548 :                         btrfs_free_log(trans, root);
    1058        2548 :                         btrfs_update_reloc_root(trans, root);
    1059        2548 :                         btrfs_orphan_commit_root(trans, root);
    1060             : 
    1061        2548 :                         btrfs_save_ino_cache(root, trans);
    1062             : 
    1063             :                         /* see comments in should_cow_block() */
    1064             :                         clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
    1065        2548 :                         smp_mb__after_atomic();
    1066             : 
    1067        2548 :                         if (root->commit_root != root->node) {
    1068        2002 :                                 list_add_tail(&root->dirty_list,
    1069        2002 :                                         &trans->transaction->switch_commits);
    1070        2002 :                                 btrfs_set_root_node(&root->root_item,
    1071             :                                                     root->node);
    1072             :                         }
    1073             : 
    1074        2548 :                         err = btrfs_update_root(trans, fs_info->tree_root,
    1075             :                                                 &root->root_key,
    1076             :                                                 &root->root_item);
    1077             :                         spin_lock(&fs_info->fs_roots_radix_lock);
    1078        2548 :                         if (err)
    1079             :                                 break;
    1080             :                 }
    1081             :         }
    1082             :         spin_unlock(&fs_info->fs_roots_radix_lock);
    1083        2098 :         return err;
    1084             : }
    1085             : 
    1086             : /*
    1087             :  * defrag a given btree.
    1088             :  * Every leaf in the btree is read and defragged.
    1089             :  */
    1090           4 : int btrfs_defrag_root(struct btrfs_root *root)
    1091             : {
    1092           4 :         struct btrfs_fs_info *info = root->fs_info;
    1093             :         struct btrfs_trans_handle *trans;
    1094             :         int ret;
    1095             : 
    1096           8 :         if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state))
    1097             :                 return 0;
    1098             : 
    1099             :         while (1) {
    1100             :                 trans = btrfs_start_transaction(root, 0);
    1101           4 :                 if (IS_ERR(trans))
    1102           0 :                         return PTR_ERR(trans);
    1103             : 
    1104           4 :                 ret = btrfs_defrag_leaves(trans, root);
    1105             : 
    1106             :                 btrfs_end_transaction(trans, root);
    1107           4 :                 btrfs_btree_balance_dirty(info->tree_root);
    1108           4 :                 cond_resched();
    1109             : 
    1110           8 :                 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
    1111             :                         break;
    1112             : 
    1113           0 :                 if (btrfs_defrag_cancelled(root->fs_info)) {
    1114           0 :                         pr_debug("BTRFS: defrag_root cancelled\n");
    1115             :                         ret = -EAGAIN;
    1116             :                         break;
    1117             :                 }
    1118             :         }
    1119             :         clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state);
    1120           4 :         return ret;
    1121             : }
    1122             : 
    1123             : /*
    1124             :  * new snapshots need to be created at a very specific time in the
    1125             :  * transaction commit.  This does the actual creation.
    1126             :  *
    1127             :  * Note:
    1128             :  * If the error which may affect the commitment of the current transaction
    1129             :  * happens, we should return the error number. If the error which just affect
    1130             :  * the creation of the pending snapshots, just return 0.
    1131             :  */
    1132         146 : static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
    1133             :                                    struct btrfs_fs_info *fs_info,
    1134             :                                    struct btrfs_pending_snapshot *pending)
    1135             : {
    1136             :         struct btrfs_key key;
    1137             :         struct btrfs_root_item *new_root_item;
    1138         146 :         struct btrfs_root *tree_root = fs_info->tree_root;
    1139         146 :         struct btrfs_root *root = pending->root;
    1140             :         struct btrfs_root *parent_root;
    1141             :         struct btrfs_block_rsv *rsv;
    1142             :         struct inode *parent_inode;
    1143             :         struct btrfs_path *path;
    1144             :         struct btrfs_dir_item *dir_item;
    1145             :         struct dentry *dentry;
    1146             :         struct extent_buffer *tmp;
    1147             :         struct extent_buffer *old;
    1148         146 :         struct timespec cur_time = CURRENT_TIME;
    1149             :         int ret = 0;
    1150         146 :         u64 to_reserve = 0;
    1151         146 :         u64 index = 0;
    1152             :         u64 objectid;
    1153             :         u64 root_flags;
    1154             :         uuid_le new_uuid;
    1155             : 
    1156         146 :         path = btrfs_alloc_path();
    1157         146 :         if (!path) {
    1158           0 :                 pending->error = -ENOMEM;
    1159           0 :                 return 0;
    1160             :         }
    1161             : 
    1162             :         new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
    1163         146 :         if (!new_root_item) {
    1164           0 :                 pending->error = -ENOMEM;
    1165           0 :                 goto root_item_alloc_fail;
    1166             :         }
    1167             : 
    1168         146 :         pending->error = btrfs_find_free_objectid(tree_root, &objectid);
    1169         146 :         if (pending->error)
    1170             :                 goto no_free_objectid;
    1171             : 
    1172         146 :         btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
    1173             : 
    1174         146 :         if (to_reserve > 0) {
    1175           0 :                 pending->error = btrfs_block_rsv_add(root,
    1176             :                                                      &pending->block_rsv,
    1177             :                                                      to_reserve,
    1178             :                                                      BTRFS_RESERVE_NO_FLUSH);
    1179           0 :                 if (pending->error)
    1180             :                         goto no_free_objectid;
    1181             :         }
    1182             : 
    1183         146 :         key.objectid = objectid;
    1184         146 :         key.offset = (u64)-1;
    1185         146 :         key.type = BTRFS_ROOT_ITEM_KEY;
    1186             : 
    1187         146 :         rsv = trans->block_rsv;
    1188         146 :         trans->block_rsv = &pending->block_rsv;
    1189         146 :         trans->bytes_reserved = trans->block_rsv->reserved;
    1190             : 
    1191         146 :         dentry = pending->dentry;
    1192         146 :         parent_inode = pending->dir;
    1193         146 :         parent_root = BTRFS_I(parent_inode)->root;
    1194         146 :         record_root_in_trans(trans, parent_root);
    1195             : 
    1196             :         /*
    1197             :          * insert the directory item
    1198             :          */
    1199         146 :         ret = btrfs_set_inode_index(parent_inode, &index);
    1200         146 :         BUG_ON(ret); /* -ENOMEM */
    1201             : 
    1202             :         /* check if there is a file/dir which has the same name. */
    1203         292 :         dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
    1204             :                                          btrfs_ino(parent_inode),
    1205         146 :                                          dentry->d_name.name,
    1206         146 :                                          dentry->d_name.len, 0);
    1207         146 :         if (dir_item != NULL && !IS_ERR(dir_item)) {
    1208           0 :                 pending->error = -EEXIST;
    1209           0 :                 goto dir_item_existed;
    1210         146 :         } else if (IS_ERR(dir_item)) {
    1211           0 :                 ret = PTR_ERR(dir_item);
    1212           0 :                 btrfs_abort_transaction(trans, root, ret);
    1213           0 :                 goto fail;
    1214             :         }
    1215         146 :         btrfs_release_path(path);
    1216             : 
    1217             :         /*
    1218             :          * pull in the delayed directory update
    1219             :          * and the delayed inode item
    1220             :          * otherwise we corrupt the FS during
    1221             :          * snapshot
    1222             :          */
    1223         146 :         ret = btrfs_run_delayed_items(trans, root);
    1224         146 :         if (ret) {      /* Transaction aborted */
    1225           0 :                 btrfs_abort_transaction(trans, root, ret);
    1226           0 :                 goto fail;
    1227             :         }
    1228             : 
    1229         146 :         record_root_in_trans(trans, root);
    1230         146 :         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
    1231         146 :         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
    1232         146 :         btrfs_check_and_init_root_item(new_root_item);
    1233             : 
    1234             :         root_flags = btrfs_root_flags(new_root_item);
    1235         146 :         if (pending->readonly)
    1236          93 :                 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
    1237             :         else
    1238          53 :                 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
    1239             :         btrfs_set_root_flags(new_root_item, root_flags);
    1240             : 
    1241         146 :         btrfs_set_root_generation_v2(new_root_item,
    1242             :                         trans->transid);
    1243         146 :         uuid_le_gen(&new_uuid);
    1244         146 :         memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
    1245         146 :         memcpy(new_root_item->parent_uuid, root->root_item.uuid,
    1246             :                         BTRFS_UUID_SIZE);
    1247         146 :         if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
    1248          53 :                 memset(new_root_item->received_uuid, 0,
    1249             :                        sizeof(new_root_item->received_uuid));
    1250          53 :                 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
    1251          53 :                 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
    1252             :                 btrfs_set_root_stransid(new_root_item, 0);
    1253             :                 btrfs_set_root_rtransid(new_root_item, 0);
    1254             :         }
    1255         146 :         btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
    1256         146 :         btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
    1257         146 :         btrfs_set_root_otransid(new_root_item, trans->transid);
    1258             : 
    1259         146 :         old = btrfs_lock_root_node(root);
    1260         146 :         ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
    1261         146 :         if (ret) {
    1262           0 :                 btrfs_tree_unlock(old);
    1263           0 :                 free_extent_buffer(old);
    1264           0 :                 btrfs_abort_transaction(trans, root, ret);
    1265           0 :                 goto fail;
    1266             :         }
    1267             : 
    1268         146 :         btrfs_set_lock_blocking(old);
    1269             : 
    1270         146 :         ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
    1271             :         /* clean up in any case */
    1272         146 :         btrfs_tree_unlock(old);
    1273         146 :         free_extent_buffer(old);
    1274         146 :         if (ret) {
    1275           0 :                 btrfs_abort_transaction(trans, root, ret);
    1276           0 :                 goto fail;
    1277             :         }
    1278             : 
    1279             :         /*
    1280             :          * We need to flush delayed refs in order to make sure all of our quota
    1281             :          * operations have been done before we call btrfs_qgroup_inherit.
    1282             :          */
    1283         146 :         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
    1284         146 :         if (ret) {
    1285           0 :                 btrfs_abort_transaction(trans, root, ret);
    1286           0 :                 goto fail;
    1287             :         }
    1288             : 
    1289         146 :         ret = btrfs_qgroup_inherit(trans, fs_info,
    1290             :                                    root->root_key.objectid,
    1291             :                                    objectid, pending->inherit);
    1292         146 :         if (ret) {
    1293           0 :                 btrfs_abort_transaction(trans, root, ret);
    1294           0 :                 goto fail;
    1295             :         }
    1296             : 
    1297             :         /* see comments in should_cow_block() */
    1298             :         set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
    1299         146 :         smp_wmb();
    1300             : 
    1301         146 :         btrfs_set_root_node(new_root_item, tmp);
    1302             :         /* record when the snapshot was created in key.offset */
    1303         146 :         key.offset = trans->transid;
    1304         146 :         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
    1305         146 :         btrfs_tree_unlock(tmp);
    1306         146 :         free_extent_buffer(tmp);
    1307         146 :         if (ret) {
    1308           0 :                 btrfs_abort_transaction(trans, root, ret);
    1309           0 :                 goto fail;
    1310             :         }
    1311             : 
    1312             :         /*
    1313             :          * insert root back/forward references
    1314             :          */
    1315         438 :         ret = btrfs_add_root_ref(trans, tree_root, objectid,
    1316             :                                  parent_root->root_key.objectid,
    1317             :                                  btrfs_ino(parent_inode), index,
    1318         292 :                                  dentry->d_name.name, dentry->d_name.len);
    1319         146 :         if (ret) {
    1320           0 :                 btrfs_abort_transaction(trans, root, ret);
    1321           0 :                 goto fail;
    1322             :         }
    1323             : 
    1324         146 :         key.offset = (u64)-1;
    1325         292 :         pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
    1326         146 :         if (IS_ERR(pending->snap)) {
    1327           0 :                 ret = PTR_ERR(pending->snap);
    1328           0 :                 btrfs_abort_transaction(trans, root, ret);
    1329           0 :                 goto fail;
    1330             :         }
    1331             : 
    1332         146 :         ret = btrfs_reloc_post_snapshot(trans, pending);
    1333         146 :         if (ret) {
    1334           0 :                 btrfs_abort_transaction(trans, root, ret);
    1335           0 :                 goto fail;
    1336             :         }
    1337             : 
    1338         146 :         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
    1339         146 :         if (ret) {
    1340           0 :                 btrfs_abort_transaction(trans, root, ret);
    1341           0 :                 goto fail;
    1342             :         }
    1343             : 
    1344         438 :         ret = btrfs_insert_dir_item(trans, parent_root,
    1345         292 :                                     dentry->d_name.name, dentry->d_name.len,
    1346             :                                     parent_inode, &key,
    1347             :                                     BTRFS_FT_DIR, index);
    1348             :         /* We have check then name at the beginning, so it is impossible. */
    1349         146 :         BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
    1350         146 :         if (ret) {
    1351           0 :                 btrfs_abort_transaction(trans, root, ret);
    1352           0 :                 goto fail;
    1353             :         }
    1354             : 
    1355         292 :         btrfs_i_size_write(parent_inode, parent_inode->i_size +
    1356         146 :                                          dentry->d_name.len * 2);
    1357         146 :         parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
    1358         146 :         ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
    1359         146 :         if (ret) {
    1360           0 :                 btrfs_abort_transaction(trans, root, ret);
    1361           0 :                 goto fail;
    1362             :         }
    1363         146 :         ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
    1364             :                                   BTRFS_UUID_KEY_SUBVOL, objectid);
    1365         146 :         if (ret) {
    1366           0 :                 btrfs_abort_transaction(trans, root, ret);
    1367           0 :                 goto fail;
    1368             :         }
    1369         146 :         if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
    1370           0 :                 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
    1371             :                                           new_root_item->received_uuid,
    1372             :                                           BTRFS_UUID_KEY_RECEIVED_SUBVOL,
    1373             :                                           objectid);
    1374           0 :                 if (ret && ret != -EEXIST) {
    1375           0 :                         btrfs_abort_transaction(trans, root, ret);
    1376           0 :                         goto fail;
    1377             :                 }
    1378             :         }
    1379             : fail:
    1380         146 :         pending->error = ret;
    1381             : dir_item_existed:
    1382         146 :         trans->block_rsv = rsv;
    1383         146 :         trans->bytes_reserved = 0;
    1384             : no_free_objectid:
    1385         146 :         kfree(new_root_item);
    1386             : root_item_alloc_fail:
    1387         146 :         btrfs_free_path(path);
    1388         146 :         return ret;
    1389             : }
    1390             : 
    1391             : /*
    1392             :  * create all the snapshots we've scheduled for creation
    1393             :  */
    1394        2098 : static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
    1395             :                                              struct btrfs_fs_info *fs_info)
    1396             : {
    1397             :         struct btrfs_pending_snapshot *pending, *next;
    1398        2098 :         struct list_head *head = &trans->transaction->pending_snapshots;
    1399             :         int ret = 0;
    1400             : 
    1401        2244 :         list_for_each_entry_safe(pending, next, head, list) {
    1402         146 :                 list_del(&pending->list);
    1403         146 :                 ret = create_pending_snapshot(trans, fs_info, pending);
    1404         146 :                 if (ret)
    1405             :                         break;
    1406             :         }
    1407        2098 :         return ret;
    1408             : }
    1409             : 
    1410        2098 : static void update_super_roots(struct btrfs_root *root)
    1411             : {
    1412             :         struct btrfs_root_item *root_item;
    1413             :         struct btrfs_super_block *super;
    1414             : 
    1415        2098 :         super = root->fs_info->super_copy;
    1416             : 
    1417        2098 :         root_item = &root->fs_info->chunk_root->root_item;
    1418        2098 :         super->chunk_root = root_item->bytenr;
    1419        2098 :         super->chunk_root_generation = root_item->generation;
    1420        2098 :         super->chunk_root_level = root_item->level;
    1421             : 
    1422        2098 :         root_item = &root->fs_info->tree_root->root_item;
    1423        2098 :         super->root = root_item->bytenr;
    1424        2098 :         super->generation = root_item->generation;
    1425        2098 :         super->root_level = root_item->level;
    1426        2098 :         if (btrfs_test_opt(root, SPACE_CACHE))
    1427        2098 :                 super->cache_generation = root_item->generation;
    1428        2098 :         if (root->fs_info->update_uuid_tree_gen)
    1429        1650 :                 super->uuid_tree_generation = root_item->generation;
    1430        2098 : }
    1431             : 
    1432      154341 : int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
    1433             : {
    1434             :         struct btrfs_transaction *trans;
    1435             :         int ret = 0;
    1436             : 
    1437             :         spin_lock(&info->trans_lock);
    1438      154345 :         trans = info->running_transaction;
    1439      154345 :         if (trans)
    1440      144759 :                 ret = (trans->state >= TRANS_STATE_COMMIT_START);
    1441             :         spin_unlock(&info->trans_lock);
    1442      154345 :         return ret;
    1443             : }
    1444             : 
    1445         303 : int btrfs_transaction_blocked(struct btrfs_fs_info *info)
    1446             : {
    1447          48 :         struct btrfs_transaction *trans;
    1448             :         int ret = 0;
    1449             : 
    1450             :         spin_lock(&info->trans_lock);
    1451         303 :         trans = info->running_transaction;
    1452         303 :         if (trans)
    1453             :                 ret = is_transaction_blocked(trans);
    1454             :         spin_unlock(&info->trans_lock);
    1455         303 :         return ret;
    1456             : }
    1457             : 
    1458             : /*
    1459             :  * wait for the current transaction commit to start and block subsequent
    1460             :  * transaction joins
    1461             :  */
    1462           0 : static void wait_current_trans_commit_start(struct btrfs_root *root,
    1463             :                                             struct btrfs_transaction *trans)
    1464             : {
    1465           0 :         wait_event(root->fs_info->transaction_blocked_wait,
    1466             :                    trans->state >= TRANS_STATE_COMMIT_START ||
    1467             :                    trans->aborted);
    1468           0 : }
    1469             : 
    1470             : /*
    1471             :  * wait for the current transaction to start and then become unblocked.
    1472             :  * caller holds ref.
    1473             :  */
    1474           0 : static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
    1475             :                                          struct btrfs_transaction *trans)
    1476             : {
    1477           0 :         wait_event(root->fs_info->transaction_wait,
    1478             :                    trans->state >= TRANS_STATE_UNBLOCKED ||
    1479             :                    trans->aborted);
    1480           0 : }
    1481             : 
    1482             : /*
    1483             :  * commit transactions asynchronously. once btrfs_commit_transaction_async
    1484             :  * returns, any subsequent transaction will not be allowed to join.
    1485             :  */
    1486             : struct btrfs_async_commit {
    1487             :         struct btrfs_trans_handle *newtrans;
    1488             :         struct btrfs_root *root;
    1489             :         struct work_struct work;
    1490             : };
    1491             : 
    1492           0 : static void do_async_commit(struct work_struct *work)
    1493             : {
    1494           0 :         struct btrfs_async_commit *ac =
    1495             :                 container_of(work, struct btrfs_async_commit, work);
    1496             : 
    1497             :         /*
    1498             :          * We've got freeze protection passed with the transaction.
    1499             :          * Tell lockdep about it.
    1500             :          */
    1501             :         if (ac->newtrans->type & __TRANS_FREEZABLE)
    1502             :                 rwsem_acquire_read(
    1503             :                      &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
    1504             :                      0, 1, _THIS_IP_);
    1505             : 
    1506           0 :         current->journal_info = ac->newtrans;
    1507             : 
    1508           0 :         btrfs_commit_transaction(ac->newtrans, ac->root);
    1509           0 :         kfree(ac);
    1510           0 : }
    1511             : 
    1512           0 : int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
    1513             :                                    struct btrfs_root *root,
    1514             :                                    int wait_for_unblock)
    1515             : {
    1516             :         struct btrfs_async_commit *ac;
    1517             :         struct btrfs_transaction *cur_trans;
    1518             : 
    1519             :         ac = kmalloc(sizeof(*ac), GFP_NOFS);
    1520           0 :         if (!ac)
    1521             :                 return -ENOMEM;
    1522             : 
    1523           0 :         INIT_WORK(&ac->work, do_async_commit);
    1524           0 :         ac->root = root;
    1525           0 :         ac->newtrans = btrfs_join_transaction(root);
    1526           0 :         if (IS_ERR(ac->newtrans)) {
    1527           0 :                 int err = PTR_ERR(ac->newtrans);
    1528           0 :                 kfree(ac);
    1529           0 :                 return err;
    1530             :         }
    1531             : 
    1532             :         /* take transaction reference */
    1533           0 :         cur_trans = trans->transaction;
    1534           0 :         atomic_inc(&cur_trans->use_count);
    1535             : 
    1536             :         btrfs_end_transaction(trans, root);
    1537             : 
    1538             :         /*
    1539             :          * Tell lockdep we've released the freeze rwsem, since the
    1540             :          * async commit thread will be the one to unlock it.
    1541             :          */
    1542             :         if (ac->newtrans->type & __TRANS_FREEZABLE)
    1543             :                 rwsem_release(
    1544             :                         &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
    1545             :                         1, _THIS_IP_);
    1546             : 
    1547           0 :         schedule_work(&ac->work);
    1548             : 
    1549             :         /* wait for transaction to start and unblock */
    1550           0 :         if (wait_for_unblock)
    1551           0 :                 wait_current_trans_commit_start_and_unblock(root, cur_trans);
    1552             :         else
    1553           0 :                 wait_current_trans_commit_start(root, cur_trans);
    1554             : 
    1555           0 :         if (current->journal_info == trans)
    1556           0 :                 current->journal_info = NULL;
    1557             : 
    1558           0 :         btrfs_put_transaction(cur_trans);
    1559           0 :         return 0;
    1560             : }
    1561             : 
    1562             : 
    1563           0 : static void cleanup_transaction(struct btrfs_trans_handle *trans,
    1564             :                                 struct btrfs_root *root, int err)
    1565             : {
    1566           0 :         struct btrfs_transaction *cur_trans = trans->transaction;
    1567           0 :         DEFINE_WAIT(wait);
    1568             : 
    1569           0 :         WARN_ON(trans->use_count > 1);
    1570             : 
    1571           0 :         btrfs_abort_transaction(trans, root, err);
    1572             : 
    1573           0 :         spin_lock(&root->fs_info->trans_lock);
    1574             : 
    1575             :         /*
    1576             :          * If the transaction is removed from the list, it means this
    1577             :          * transaction has been committed successfully, so it is impossible
    1578             :          * to call the cleanup function.
    1579             :          */
    1580           0 :         BUG_ON(list_empty(&cur_trans->list));
    1581             : 
    1582             :         list_del_init(&cur_trans->list);
    1583           0 :         if (cur_trans == root->fs_info->running_transaction) {
    1584           0 :                 cur_trans->state = TRANS_STATE_COMMIT_DOING;
    1585           0 :                 spin_unlock(&root->fs_info->trans_lock);
    1586           0 :                 wait_event(cur_trans->writer_wait,
    1587             :                            atomic_read(&cur_trans->num_writers) == 1);
    1588             : 
    1589           0 :                 spin_lock(&root->fs_info->trans_lock);
    1590             :         }
    1591           0 :         spin_unlock(&root->fs_info->trans_lock);
    1592             : 
    1593           0 :         btrfs_cleanup_one_transaction(trans->transaction, root);
    1594             : 
    1595           0 :         spin_lock(&root->fs_info->trans_lock);
    1596           0 :         if (cur_trans == root->fs_info->running_transaction)
    1597           0 :                 root->fs_info->running_transaction = NULL;
    1598           0 :         spin_unlock(&root->fs_info->trans_lock);
    1599             : 
    1600           0 :         if (trans->type & __TRANS_FREEZABLE)
    1601           0 :                 sb_end_intwrite(root->fs_info->sb);
    1602           0 :         btrfs_put_transaction(cur_trans);
    1603           0 :         btrfs_put_transaction(cur_trans);
    1604             : 
    1605           0 :         trace_btrfs_transaction_commit(root);
    1606             : 
    1607           0 :         if (current->journal_info == trans)
    1608           0 :                 current->journal_info = NULL;
    1609           0 :         btrfs_scrub_cancel(root->fs_info);
    1610             : 
    1611           0 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    1612           0 : }
    1613             : 
    1614        2098 : static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
    1615             : {
    1616        2098 :         if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
    1617           3 :                 return btrfs_start_delalloc_roots(fs_info, 1, -1);
    1618             :         return 0;
    1619             : }
    1620             : 
    1621        2098 : static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
    1622             : {
    1623        2098 :         if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
    1624           3 :                 btrfs_wait_ordered_roots(fs_info, -1);
    1625        2098 : }
    1626             : 
    1627        2190 : int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
    1628        2098 :                              struct btrfs_root *root)
    1629             : {
    1630        2190 :         struct btrfs_transaction *cur_trans = trans->transaction;
    1631             :         struct btrfs_transaction *prev_trans = NULL;
    1632             :         int ret;
    1633             : 
    1634             :         /* Stop the commit early if ->aborted is set */
    1635        2190 :         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
    1636           0 :                 ret = cur_trans->aborted;
    1637           0 :                 btrfs_end_transaction(trans, root);
    1638           0 :                 return ret;
    1639             :         }
    1640             : 
    1641             :         /* make a pass through all the delayed refs we have so far
    1642             :          * any runnings procs may add more while we are here
    1643             :          */
    1644        2190 :         ret = btrfs_run_delayed_refs(trans, root, 0);
    1645        2190 :         if (ret) {
    1646           0 :                 btrfs_end_transaction(trans, root);
    1647           0 :                 return ret;
    1648             :         }
    1649             : 
    1650        2190 :         btrfs_trans_release_metadata(trans, root);
    1651        2190 :         trans->block_rsv = NULL;
    1652        2190 :         if (trans->qgroup_reserved) {
    1653           0 :                 btrfs_qgroup_free(root, trans->qgroup_reserved);
    1654           0 :                 trans->qgroup_reserved = 0;
    1655             :         }
    1656             : 
    1657        2190 :         cur_trans = trans->transaction;
    1658             : 
    1659             :         /*
    1660             :          * set the flushing flag so procs in this transaction have to
    1661             :          * start sending their work down.
    1662             :          */
    1663        2190 :         cur_trans->delayed_refs.flushing = 1;
    1664        2190 :         smp_wmb();
    1665             : 
    1666        4378 :         if (!list_empty(&trans->new_bgs))
    1667           0 :                 btrfs_create_pending_block_groups(trans, root);
    1668             : 
    1669        2189 :         ret = btrfs_run_delayed_refs(trans, root, 0);
    1670        2190 :         if (ret) {
    1671           0 :                 btrfs_end_transaction(trans, root);
    1672           0 :                 return ret;
    1673             :         }
    1674             : 
    1675        2190 :         spin_lock(&root->fs_info->trans_lock);
    1676        2190 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
    1677          92 :                 spin_unlock(&root->fs_info->trans_lock);
    1678          92 :                 atomic_inc(&cur_trans->use_count);
    1679          92 :                 ret = btrfs_end_transaction(trans, root);
    1680             : 
    1681          92 :                 wait_for_commit(root, cur_trans);
    1682             : 
    1683          91 :                 btrfs_put_transaction(cur_trans);
    1684             : 
    1685          92 :                 return ret;
    1686             :         }
    1687             : 
    1688        2098 :         cur_trans->state = TRANS_STATE_COMMIT_START;
    1689        2098 :         wake_up(&root->fs_info->transaction_blocked_wait);
    1690             : 
    1691        2098 :         if (cur_trans->list.prev != &root->fs_info->trans_list) {
    1692          59 :                 prev_trans = list_entry(cur_trans->list.prev,
    1693             :                                         struct btrfs_transaction, list);
    1694          59 :                 if (prev_trans->state != TRANS_STATE_COMPLETED) {
    1695          59 :                         atomic_inc(&prev_trans->use_count);
    1696          59 :                         spin_unlock(&root->fs_info->trans_lock);
    1697             : 
    1698          59 :                         wait_for_commit(root, prev_trans);
    1699             : 
    1700          59 :                         btrfs_put_transaction(prev_trans);
    1701             :                 } else {
    1702             :                         spin_unlock(&root->fs_info->trans_lock);
    1703             :                 }
    1704             :         } else {
    1705             :                 spin_unlock(&root->fs_info->trans_lock);
    1706             :         }
    1707             : 
    1708        2098 :         extwriter_counter_dec(cur_trans, trans->type);
    1709             : 
    1710        2098 :         ret = btrfs_start_delalloc_flush(root->fs_info);
    1711        2098 :         if (ret)
    1712             :                 goto cleanup_transaction;
    1713             : 
    1714        2098 :         ret = btrfs_run_delayed_items(trans, root);
    1715        2098 :         if (ret)
    1716             :                 goto cleanup_transaction;
    1717             : 
    1718        2251 :         wait_event(cur_trans->writer_wait,
    1719             :                    extwriter_counter_read(cur_trans) == 0);
    1720             : 
    1721             :         /* some pending stuffs might be added after the previous flush. */
    1722        2098 :         ret = btrfs_run_delayed_items(trans, root);
    1723        2098 :         if (ret)
    1724             :                 goto cleanup_transaction;
    1725             : 
    1726        2098 :         btrfs_wait_delalloc_flush(root->fs_info);
    1727             : 
    1728        2098 :         btrfs_scrub_pause(root);
    1729             :         /*
    1730             :          * Ok now we need to make sure to block out any other joins while we
    1731             :          * commit the transaction.  We could have started a join before setting
    1732             :          * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
    1733             :          */
    1734        2098 :         spin_lock(&root->fs_info->trans_lock);
    1735        2098 :         cur_trans->state = TRANS_STATE_COMMIT_DOING;
    1736        2098 :         spin_unlock(&root->fs_info->trans_lock);
    1737        2131 :         wait_event(cur_trans->writer_wait,
    1738             :                    atomic_read(&cur_trans->num_writers) == 1);
    1739             : 
    1740             :         /* ->aborted might be set after the previous check, so check it */
    1741        2098 :         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
    1742           0 :                 ret = cur_trans->aborted;
    1743           0 :                 goto scrub_continue;
    1744             :         }
    1745             :         /*
    1746             :          * the reloc mutex makes sure that we stop
    1747             :          * the balancing code from coming in and moving
    1748             :          * extents around in the middle of the commit
    1749             :          */
    1750        2098 :         mutex_lock(&root->fs_info->reloc_mutex);
    1751             : 
    1752             :         /*
    1753             :          * We needn't worry about the delayed items because we will
    1754             :          * deal with them in create_pending_snapshot(), which is the
    1755             :          * core function of the snapshot creation.
    1756             :          */
    1757        2098 :         ret = create_pending_snapshots(trans, root->fs_info);
    1758        2098 :         if (ret) {
    1759           0 :                 mutex_unlock(&root->fs_info->reloc_mutex);
    1760           0 :                 goto scrub_continue;
    1761             :         }
    1762             : 
    1763             :         /*
    1764             :          * We insert the dir indexes of the snapshots and update the inode
    1765             :          * of the snapshots' parents after the snapshot creation, so there
    1766             :          * are some delayed items which are not dealt with. Now deal with
    1767             :          * them.
    1768             :          *
    1769             :          * We needn't worry that this operation will corrupt the snapshots,
    1770             :          * because all the tree which are snapshoted will be forced to COW
    1771             :          * the nodes and leaves.
    1772             :          */
    1773        2098 :         ret = btrfs_run_delayed_items(trans, root);
    1774        2098 :         if (ret) {
    1775           0 :                 mutex_unlock(&root->fs_info->reloc_mutex);
    1776           0 :                 goto scrub_continue;
    1777             :         }
    1778             : 
    1779        2098 :         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
    1780        2098 :         if (ret) {
    1781           0 :                 mutex_unlock(&root->fs_info->reloc_mutex);
    1782           0 :                 goto scrub_continue;
    1783             :         }
    1784             : 
    1785             :         /*
    1786             :          * make sure none of the code above managed to slip in a
    1787             :          * delayed item
    1788             :          */
    1789        2098 :         btrfs_assert_delayed_root_empty(root);
    1790             : 
    1791        2098 :         WARN_ON(cur_trans != trans->transaction);
    1792             : 
    1793             :         /* btrfs_commit_tree_roots is responsible for getting the
    1794             :          * various roots consistent with each other.  Every pointer
    1795             :          * in the tree of tree roots has to point to the most up to date
    1796             :          * root for every subvolume and other tree.  So, we have to keep
    1797             :          * the tree logging code from jumping in and changing any
    1798             :          * of the trees.
    1799             :          *
    1800             :          * At this point in the commit, there can't be any tree-log
    1801             :          * writers, but a little lower down we drop the trans mutex
    1802             :          * and let new people in.  By holding the tree_log_mutex
    1803             :          * from now until after the super is written, we avoid races
    1804             :          * with the tree-log code.
    1805             :          */
    1806        2098 :         mutex_lock(&root->fs_info->tree_log_mutex);
    1807             : 
    1808        2098 :         ret = commit_fs_roots(trans, root);
    1809        2098 :         if (ret) {
    1810           0 :                 mutex_unlock(&root->fs_info->tree_log_mutex);
    1811           0 :                 mutex_unlock(&root->fs_info->reloc_mutex);
    1812           0 :                 goto scrub_continue;
    1813             :         }
    1814             : 
    1815             :         /*
    1816             :          * Since the transaction is done, we should set the inode map cache flag
    1817             :          * before any other comming transaction.
    1818             :          */
    1819        2098 :         if (btrfs_test_opt(root, CHANGE_INODE_CACHE))
    1820           0 :                 btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
    1821             :         else
    1822        2098 :                 btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
    1823             : 
    1824             :         /* commit_fs_roots gets rid of all the tree log roots, it is now
    1825             :          * safe to free the root of tree log roots
    1826             :          */
    1827        2098 :         btrfs_free_log_root_tree(trans, root->fs_info);
    1828             : 
    1829        2098 :         ret = commit_cowonly_roots(trans, root);
    1830        2098 :         if (ret) {
    1831           0 :                 mutex_unlock(&root->fs_info->tree_log_mutex);
    1832           0 :                 mutex_unlock(&root->fs_info->reloc_mutex);
    1833           0 :                 goto scrub_continue;
    1834             :         }
    1835             : 
    1836             :         /*
    1837             :          * The tasks which save the space cache and inode cache may also
    1838             :          * update ->aborted, check it.
    1839             :          */
    1840        2098 :         if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
    1841           0 :                 ret = cur_trans->aborted;
    1842           0 :                 mutex_unlock(&root->fs_info->tree_log_mutex);
    1843           0 :                 mutex_unlock(&root->fs_info->reloc_mutex);
    1844           0 :                 goto scrub_continue;
    1845             :         }
    1846             : 
    1847        2098 :         btrfs_prepare_extent_commit(trans, root);
    1848             : 
    1849        2098 :         cur_trans = root->fs_info->running_transaction;
    1850             : 
    1851        2098 :         btrfs_set_root_node(&root->fs_info->tree_root->root_item,
    1852        2098 :                             root->fs_info->tree_root->node);
    1853        2098 :         list_add_tail(&root->fs_info->tree_root->dirty_list,
    1854             :                       &cur_trans->switch_commits);
    1855             : 
    1856        2098 :         btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
    1857        2098 :                             root->fs_info->chunk_root->node);
    1858        2098 :         list_add_tail(&root->fs_info->chunk_root->dirty_list,
    1859             :                       &cur_trans->switch_commits);
    1860             : 
    1861        2098 :         switch_commit_roots(cur_trans, root->fs_info);
    1862             : 
    1863        2098 :         assert_qgroups_uptodate(trans);
    1864        2098 :         update_super_roots(root);
    1865             : 
    1866        2098 :         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
    1867        2098 :         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
    1868        2098 :         memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
    1869             :                sizeof(*root->fs_info->super_copy));
    1870             : 
    1871        2098 :         spin_lock(&root->fs_info->trans_lock);
    1872        2098 :         cur_trans->state = TRANS_STATE_UNBLOCKED;
    1873        2098 :         root->fs_info->running_transaction = NULL;
    1874        2098 :         spin_unlock(&root->fs_info->trans_lock);
    1875        2098 :         mutex_unlock(&root->fs_info->reloc_mutex);
    1876             : 
    1877        2098 :         wake_up(&root->fs_info->transaction_wait);
    1878             : 
    1879        2098 :         ret = btrfs_write_and_wait_transaction(trans, root);
    1880        2098 :         if (ret) {
    1881           0 :                 btrfs_error(root->fs_info, ret,
    1882             :                             "Error while writing out transaction");
    1883           0 :                 mutex_unlock(&root->fs_info->tree_log_mutex);
    1884           0 :                 goto scrub_continue;
    1885             :         }
    1886             : 
    1887        2098 :         ret = write_ctree_super(trans, root, 0);
    1888        2098 :         if (ret) {
    1889           0 :                 mutex_unlock(&root->fs_info->tree_log_mutex);
    1890           0 :                 goto scrub_continue;
    1891             :         }
    1892             : 
    1893             :         /*
    1894             :          * the super is written, we can safely allow the tree-loggers
    1895             :          * to go about their business
    1896             :          */
    1897        2098 :         mutex_unlock(&root->fs_info->tree_log_mutex);
    1898             : 
    1899        2098 :         btrfs_finish_extent_commit(trans, root);
    1900             : 
    1901        2098 :         root->fs_info->last_trans_committed = cur_trans->transid;
    1902             :         /*
    1903             :          * We needn't acquire the lock here because there is no other task
    1904             :          * which can change it.
    1905             :          */
    1906        2098 :         cur_trans->state = TRANS_STATE_COMPLETED;
    1907        2098 :         wake_up(&cur_trans->commit_wait);
    1908             : 
    1909        2098 :         spin_lock(&root->fs_info->trans_lock);
    1910        2098 :         list_del_init(&cur_trans->list);
    1911        2098 :         spin_unlock(&root->fs_info->trans_lock);
    1912             : 
    1913        2098 :         btrfs_put_transaction(cur_trans);
    1914        2098 :         btrfs_put_transaction(cur_trans);
    1915             : 
    1916        2098 :         if (trans->type & __TRANS_FREEZABLE)
    1917        1081 :                 sb_end_intwrite(root->fs_info->sb);
    1918             : 
    1919        2098 :         trace_btrfs_transaction_commit(root);
    1920             : 
    1921        2098 :         btrfs_scrub_continue(root);
    1922             : 
    1923        2098 :         if (current->journal_info == trans)
    1924        2098 :                 current->journal_info = NULL;
    1925             : 
    1926        2098 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    1927             : 
    1928        2098 :         if (current != root->fs_info->transaction_kthread)
    1929        2082 :                 btrfs_run_delayed_iputs(root);
    1930             : 
    1931        2098 :         return ret;
    1932             : 
    1933             : scrub_continue:
    1934           0 :         btrfs_scrub_continue(root);
    1935             : cleanup_transaction:
    1936           0 :         btrfs_trans_release_metadata(trans, root);
    1937           0 :         trans->block_rsv = NULL;
    1938           0 :         if (trans->qgroup_reserved) {
    1939           0 :                 btrfs_qgroup_free(root, trans->qgroup_reserved);
    1940           0 :                 trans->qgroup_reserved = 0;
    1941             :         }
    1942           0 :         btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
    1943           0 :         if (current->journal_info == trans)
    1944           0 :                 current->journal_info = NULL;
    1945           0 :         cleanup_transaction(trans, root, ret);
    1946             : 
    1947           0 :         return ret;
    1948             : }
    1949             : 
    1950             : /*
    1951             :  * return < 0 if error
    1952             :  * 0 if there are no more dead_roots at the time of call
    1953             :  * 1 there are more to be processed, call me again
    1954             :  *
    1955             :  * The return value indicates there are certainly more snapshots to delete, but
    1956             :  * if there comes a new one during processing, it may return 0. We don't mind,
    1957             :  * because btrfs_commit_super will poke cleaner thread and it will process it a
    1958             :  * few seconds later.
    1959             :  */
    1960         295 : int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
    1961             : {
    1962             :         int ret;
    1963         295 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1964             : 
    1965             :         spin_lock(&fs_info->trans_lock);
    1966         590 :         if (list_empty(&fs_info->dead_roots)) {
    1967             :                 spin_unlock(&fs_info->trans_lock);
    1968         284 :                 return 0;
    1969             :         }
    1970          11 :         root = list_first_entry(&fs_info->dead_roots,
    1971             :                         struct btrfs_root, root_list);
    1972          11 :         list_del_init(&root->root_list);
    1973             :         spin_unlock(&fs_info->trans_lock);
    1974             : 
    1975          11 :         pr_debug("BTRFS: cleaner removing %llu\n", root->objectid);
    1976             : 
    1977          11 :         btrfs_kill_all_delayed_nodes(root);
    1978             : 
    1979          22 :         if (btrfs_header_backref_rev(root->node) <
    1980             :                         BTRFS_MIXED_BACKREF_REV)
    1981           0 :                 ret = btrfs_drop_snapshot(root, NULL, 0, 0);
    1982             :         else
    1983          11 :                 ret = btrfs_drop_snapshot(root, NULL, 1, 0);
    1984             :         /*
    1985             :          * If we encounter a transaction abort during snapshot cleaning, we
    1986             :          * don't want to crash here
    1987             :          */
    1988          11 :         return (ret < 0) ? 0 : 1;
    1989             : }

Generated by: LCOV version 1.10