LCOV - code coverage report
Current view: top level - fs/btrfs - tree-log.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 681 1650 41.3 %
Date: 2014-11-28 Functions: 30 58 51.7 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2008 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : 
      19             : #include <linux/sched.h>
      20             : #include <linux/slab.h>
      21             : #include <linux/blkdev.h>
      22             : #include <linux/list_sort.h>
      23             : #include "tree-log.h"
      24             : #include "disk-io.h"
      25             : #include "locking.h"
      26             : #include "print-tree.h"
      27             : #include "backref.h"
      28             : #include "hash.h"
      29             : 
      30             : /* magic values for the inode_only field in btrfs_log_inode:
      31             :  *
      32             :  * LOG_INODE_ALL means to log everything
      33             :  * LOG_INODE_EXISTS means to log just enough to recreate the inode
      34             :  * during log replay
      35             :  */
      36             : #define LOG_INODE_ALL 0
      37             : #define LOG_INODE_EXISTS 1
      38             : 
      39             : /*
      40             :  * directory trouble cases
      41             :  *
      42             :  * 1) on rename or unlink, if the inode being unlinked isn't in the fsync
      43             :  * log, we must force a full commit before doing an fsync of the directory
      44             :  * where the unlink was done.
      45             :  * ---> record transid of last unlink/rename per directory
      46             :  *
      47             :  * mkdir foo/some_dir
      48             :  * normal commit
      49             :  * rename foo/some_dir foo2/some_dir
      50             :  * mkdir foo/some_dir
      51             :  * fsync foo/some_dir/some_file
      52             :  *
      53             :  * The fsync above will unlink the original some_dir without recording
      54             :  * it in its new location (foo2).  After a crash, some_dir will be gone
      55             :  * unless the fsync of some_file forces a full commit
      56             :  *
      57             :  * 2) we must log any new names for any file or dir that is in the fsync
      58             :  * log. ---> check inode while renaming/linking.
      59             :  *
      60             :  * 2a) we must log any new names for any file or dir during rename
      61             :  * when the directory they are being removed from was logged.
      62             :  * ---> check inode and old parent dir during rename
      63             :  *
      64             :  *  2a is actually the more important variant.  With the extra logging
      65             :  *  a crash might unlink the old name without recreating the new one
      66             :  *
      67             :  * 3) after a crash, we must go through any directories with a link count
      68             :  * of zero and redo the rm -rf
      69             :  *
      70             :  * mkdir f1/foo
      71             :  * normal commit
      72             :  * rm -rf f1/foo
      73             :  * fsync(f1)
      74             :  *
      75             :  * The directory f1 was fully removed from the FS, but fsync was never
      76             :  * called on f1, only its parent dir.  After a crash the rm -rf must
      77             :  * be replayed.  This must be able to recurse down the entire
      78             :  * directory tree.  The inode link count fixup code takes care of the
      79             :  * ugly details.
      80             :  */
      81             : 
      82             : /*
      83             :  * stages for the tree walking.  The first
      84             :  * stage (0) is to only pin down the blocks we find
      85             :  * the second stage (1) is to make sure that all the inodes
      86             :  * we find in the log are created in the subvolume.
      87             :  *
      88             :  * The last stage is to deal with directories and links and extents
      89             :  * and all the other fun semantics
      90             :  */
      91             : #define LOG_WALK_PIN_ONLY 0
      92             : #define LOG_WALK_REPLAY_INODES 1
      93             : #define LOG_WALK_REPLAY_DIR_INDEX 2
      94             : #define LOG_WALK_REPLAY_ALL 3
      95             : 
      96             : static int btrfs_log_inode(struct btrfs_trans_handle *trans,
      97             :                            struct btrfs_root *root, struct inode *inode,
      98             :                            int inode_only,
      99             :                            const loff_t start,
     100             :                            const loff_t end);
     101             : static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
     102             :                              struct btrfs_root *root,
     103             :                              struct btrfs_path *path, u64 objectid);
     104             : static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
     105             :                                        struct btrfs_root *root,
     106             :                                        struct btrfs_root *log,
     107             :                                        struct btrfs_path *path,
     108             :                                        u64 dirid, int del_all);
     109             : 
     110             : /*
     111             :  * tree logging is a special write ahead log used to make sure that
     112             :  * fsyncs and O_SYNCs can happen without doing full tree commits.
     113             :  *
     114             :  * Full tree commits are expensive because they require commonly
     115             :  * modified blocks to be recowed, creating many dirty pages in the
     116             :  * extent tree an 4x-6x higher write load than ext3.
     117             :  *
     118             :  * Instead of doing a tree commit on every fsync, we use the
     119             :  * key ranges and transaction ids to find items for a given file or directory
     120             :  * that have changed in this transaction.  Those items are copied into
     121             :  * a special tree (one per subvolume root), that tree is written to disk
     122             :  * and then the fsync is considered complete.
     123             :  *
     124             :  * After a crash, items are copied out of the log-tree back into the
     125             :  * subvolume tree.  Any file data extents found are recorded in the extent
     126             :  * allocation tree, and the log-tree freed.
     127             :  *
     128             :  * The log tree is read three times, once to pin down all the extents it is
     129             :  * using in ram and once, once to create all the inodes logged in the tree
     130             :  * and once to do all the other items.
     131             :  */
     132             : 
     133             : /*
     134             :  * start a sub transaction and setup the log tree
     135             :  * this increments the log tree writer count to make the people
     136             :  * syncing the tree wait for us to finish
     137             :  */
     138        2925 : static int start_log_trans(struct btrfs_trans_handle *trans,
     139             :                            struct btrfs_root *root,
     140             :                            struct btrfs_log_ctx *ctx)
     141             : {
     142             :         int index;
     143             :         int ret;
     144             : 
     145        1575 :         mutex_lock(&root->log_mutex);
     146        1575 :         if (root->log_root) {
     147        2700 :                 if (btrfs_need_log_full_commit(root->fs_info, trans)) {
     148             :                         ret = -EAGAIN;
     149             :                         goto out;
     150             :                 }
     151        1350 :                 if (!root->log_start_pid) {
     152        1277 :                         root->log_start_pid = current->pid;
     153             :                         clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
     154          73 :                 } else if (root->log_start_pid != current->pid) {
     155             :                         set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
     156             :                 }
     157             : 
     158        1350 :                 atomic_inc(&root->log_batch);
     159        1350 :                 atomic_inc(&root->log_writers);
     160        1350 :                 if (ctx) {
     161        1259 :                         index = root->log_transid % 2;
     162        1259 :                         list_add_tail(&ctx->list, &root->log_ctxs[index]);
     163        1259 :                         ctx->log_transid = root->log_transid;
     164             :                 }
     165        1350 :                 mutex_unlock(&root->log_mutex);
     166        1350 :                 return 0;
     167             :         }
     168             : 
     169             :         ret = 0;
     170         225 :         mutex_lock(&root->fs_info->tree_log_mutex);
     171         225 :         if (!root->fs_info->log_root_tree)
     172         225 :                 ret = btrfs_init_log_root_tree(trans, root->fs_info);
     173         225 :         mutex_unlock(&root->fs_info->tree_log_mutex);
     174         225 :         if (ret)
     175             :                 goto out;
     176             : 
     177         225 :         if (!root->log_root) {
     178         225 :                 ret = btrfs_add_log_tree(trans, root);
     179         225 :                 if (ret)
     180             :                         goto out;
     181             :         }
     182             :         clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
     183         225 :         root->log_start_pid = current->pid;
     184         225 :         atomic_inc(&root->log_batch);
     185         225 :         atomic_inc(&root->log_writers);
     186         225 :         if (ctx) {
     187         224 :                 index = root->log_transid % 2;
     188         224 :                 list_add_tail(&ctx->list, &root->log_ctxs[index]);
     189         224 :                 ctx->log_transid = root->log_transid;
     190             :         }
     191             : out:
     192         225 :         mutex_unlock(&root->log_mutex);
     193         225 :         return ret;
     194             : }
     195             : 
     196             : /*
     197             :  * returns 0 if there was a log transaction running and we were able
     198             :  * to join, or returns -ENOENT if there were not transactions
     199             :  * in progress
     200             :  */
     201         200 : static int join_running_log_trans(struct btrfs_root *root)
     202             : {
     203             :         int ret = -ENOENT;
     204             : 
     205         200 :         smp_mb();
     206         200 :         if (!root->log_root)
     207             :                 return -ENOENT;
     208             : 
     209         200 :         mutex_lock(&root->log_mutex);
     210         200 :         if (root->log_root) {
     211             :                 ret = 0;
     212         200 :                 atomic_inc(&root->log_writers);
     213             :         }
     214         200 :         mutex_unlock(&root->log_mutex);
     215         200 :         return ret;
     216             : }
     217             : 
     218             : /*
     219             :  * This either makes the current running log transaction wait
     220             :  * until you call btrfs_end_log_trans() or it makes any future
     221             :  * log transactions wait until you call btrfs_end_log_trans()
     222             :  */
     223        2311 : int btrfs_pin_log_trans(struct btrfs_root *root)
     224             : {
     225             :         int ret = -ENOENT;
     226             : 
     227        2311 :         mutex_lock(&root->log_mutex);
     228        2311 :         atomic_inc(&root->log_writers);
     229        2311 :         mutex_unlock(&root->log_mutex);
     230        2311 :         return ret;
     231             : }
     232             : 
     233             : /*
     234             :  * indicate we're done making changes to the log tree
     235             :  * and wake up anyone waiting to do a sync
     236             :  */
     237        4086 : void btrfs_end_log_trans(struct btrfs_root *root)
     238             : {
     239        8172 :         if (atomic_dec_and_test(&root->log_writers)) {
     240        3884 :                 smp_mb();
     241        3884 :                 if (waitqueue_active(&root->log_writer_wait))
     242           1 :                         wake_up(&root->log_writer_wait);
     243             :         }
     244        4086 : }
     245             : 
     246             : 
     247             : /*
     248             :  * the walk control struct is used to pass state down the chain when
     249             :  * processing the log tree.  The stage field tells us which part
     250             :  * of the log tree processing we are currently doing.  The others
     251             :  * are state fields used for that specific part
     252             :  */
     253             : struct walk_control {
     254             :         /* should we free the extent on disk when done?  This is used
     255             :          * at transaction commit time while freeing a log tree
     256             :          */
     257             :         int free;
     258             : 
     259             :         /* should we write out the extent buffer?  This is used
     260             :          * while flushing the log tree to disk during a sync
     261             :          */
     262             :         int write;
     263             : 
     264             :         /* should we wait for the extent buffer io to finish?  Also used
     265             :          * while flushing the log tree to disk for a sync
     266             :          */
     267             :         int wait;
     268             : 
     269             :         /* pin only walk, we record which extents on disk belong to the
     270             :          * log trees
     271             :          */
     272             :         int pin;
     273             : 
     274             :         /* what stage of the replay code we're currently in */
     275             :         int stage;
     276             : 
     277             :         /* the root we are currently replaying */
     278             :         struct btrfs_root *replay_dest;
     279             : 
     280             :         /* the trans handle for the current replay */
     281             :         struct btrfs_trans_handle *trans;
     282             : 
     283             :         /* the function that gets used to process blocks we find in the
     284             :          * tree.  Note the extent_buffer might not be up to date when it is
     285             :          * passed in, and it must be checked or read if you need the data
     286             :          * inside it
     287             :          */
     288             :         int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
     289             :                             struct walk_control *wc, u64 gen);
     290             : };
     291             : 
     292             : /*
     293             :  * process_func used to pin down extents, write them or wait on them
     294             :  */
     295         505 : static int process_one_buffer(struct btrfs_root *log,
     296           0 :                               struct extent_buffer *eb,
     297             :                               struct walk_control *wc, u64 gen)
     298             : {
     299             :         int ret = 0;
     300             : 
     301             :         /*
     302             :          * If this fs is mixed then we need to be able to process the leaves to
     303             :          * pin down any logged extents, so we have to read the block.
     304             :          */
     305        1010 :         if (btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) {
     306         128 :                 ret = btrfs_read_buffer(eb, gen);
     307         128 :                 if (ret)
     308             :                         return ret;
     309             :         }
     310             : 
     311         505 :         if (wc->pin)
     312           0 :                 ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
     313           0 :                                                       eb->start, eb->len);
     314             : 
     315         505 :         if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
     316         505 :                 if (wc->pin && btrfs_header_level(eb) == 0)
     317           0 :                         ret = btrfs_exclude_logged_extents(log, eb);
     318         505 :                 if (wc->write)
     319           0 :                         btrfs_write_tree_block(eb);
     320         505 :                 if (wc->wait)
     321           0 :                         btrfs_wait_tree_block_writeback(eb);
     322             :         }
     323         505 :         return ret;
     324             : }
     325             : 
     326             : /*
     327             :  * Item overwrite used by replay and tree logging.  eb, slot and key all refer
     328             :  * to the src data we are copying out.
     329             :  *
     330             :  * root is the tree we are copying into, and path is a scratch
     331             :  * path for use in this function (it should be released on entry and
     332             :  * will be released on exit).
     333             :  *
     334             :  * If the key is already in the destination tree the existing item is
     335             :  * overwritten.  If the existing item isn't big enough, it is extended.
     336             :  * If it is too large, it is truncated.
     337             :  *
     338             :  * If the key isn't in the destination yet, a new item is inserted.
     339             :  */
     340           0 : static noinline int overwrite_item(struct btrfs_trans_handle *trans,
     341             :                                    struct btrfs_root *root,
     342             :                                    struct btrfs_path *path,
     343             :                                    struct extent_buffer *eb, int slot,
     344             :                                    struct btrfs_key *key)
     345             : {
     346             :         int ret;
     347             :         u32 item_size;
     348             :         u64 saved_i_size = 0;
     349             :         int save_old_i_size = 0;
     350             :         unsigned long src_ptr;
     351             :         unsigned long dst_ptr;
     352             :         int overwrite_root = 0;
     353           0 :         bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
     354             : 
     355           0 :         if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
     356             :                 overwrite_root = 1;
     357             : 
     358             :         item_size = btrfs_item_size_nr(eb, slot);
     359           0 :         src_ptr = btrfs_item_ptr_offset(eb, slot);
     360             : 
     361             :         /* look for the key in the destination tree */
     362           0 :         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
     363           0 :         if (ret < 0)
     364             :                 return ret;
     365             : 
     366           0 :         if (ret == 0) {
     367             :                 char *src_copy;
     368             :                 char *dst_copy;
     369           0 :                 u32 dst_size = btrfs_item_size_nr(path->nodes[0],
     370             :                                                   path->slots[0]);
     371           0 :                 if (dst_size != item_size)
     372             :                         goto insert;
     373             : 
     374           0 :                 if (item_size == 0) {
     375           0 :                         btrfs_release_path(path);
     376           0 :                         return 0;
     377             :                 }
     378           0 :                 dst_copy = kmalloc(item_size, GFP_NOFS);
     379             :                 src_copy = kmalloc(item_size, GFP_NOFS);
     380           0 :                 if (!dst_copy || !src_copy) {
     381           0 :                         btrfs_release_path(path);
     382           0 :                         kfree(dst_copy);
     383           0 :                         kfree(src_copy);
     384           0 :                         return -ENOMEM;
     385             :                 }
     386             : 
     387           0 :                 read_extent_buffer(eb, src_copy, src_ptr, item_size);
     388             : 
     389           0 :                 dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
     390           0 :                 read_extent_buffer(path->nodes[0], dst_copy, dst_ptr,
     391             :                                    item_size);
     392           0 :                 ret = memcmp(dst_copy, src_copy, item_size);
     393             : 
     394           0 :                 kfree(dst_copy);
     395           0 :                 kfree(src_copy);
     396             :                 /*
     397             :                  * they have the same contents, just return, this saves
     398             :                  * us from cowing blocks in the destination tree and doing
     399             :                  * extra writes that may not have been done by a previous
     400             :                  * sync
     401             :                  */
     402           0 :                 if (ret == 0) {
     403           0 :                         btrfs_release_path(path);
     404           0 :                         return 0;
     405             :                 }
     406             : 
     407             :                 /*
     408             :                  * We need to load the old nbytes into the inode so when we
     409             :                  * replay the extents we've logged we get the right nbytes.
     410             :                  */
     411           0 :                 if (inode_item) {
     412             :                         struct btrfs_inode_item *item;
     413             :                         u64 nbytes;
     414             :                         u32 mode;
     415             : 
     416           0 :                         item = btrfs_item_ptr(path->nodes[0], path->slots[0],
     417             :                                               struct btrfs_inode_item);
     418           0 :                         nbytes = btrfs_inode_nbytes(path->nodes[0], item);
     419           0 :                         item = btrfs_item_ptr(eb, slot,
     420             :                                               struct btrfs_inode_item);
     421             :                         btrfs_set_inode_nbytes(eb, item, nbytes);
     422             : 
     423             :                         /*
     424             :                          * If this is a directory we need to reset the i_size to
     425             :                          * 0 so that we can set it up properly when replaying
     426             :                          * the rest of the items in this log.
     427             :                          */
     428             :                         mode = btrfs_inode_mode(eb, item);
     429           0 :                         if (S_ISDIR(mode))
     430             :                                 btrfs_set_inode_size(eb, item, 0);
     431             :                 }
     432           0 :         } else if (inode_item) {
     433             :                 struct btrfs_inode_item *item;
     434             :                 u32 mode;
     435             : 
     436             :                 /*
     437             :                  * New inode, set nbytes to 0 so that the nbytes comes out
     438             :                  * properly when we replay the extents.
     439             :                  */
     440           0 :                 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
     441             :                 btrfs_set_inode_nbytes(eb, item, 0);
     442             : 
     443             :                 /*
     444             :                  * If this is a directory we need to reset the i_size to 0 so
     445             :                  * that we can set it up properly when replaying the rest of
     446             :                  * the items in this log.
     447             :                  */
     448             :                 mode = btrfs_inode_mode(eb, item);
     449           0 :                 if (S_ISDIR(mode))
     450             :                         btrfs_set_inode_size(eb, item, 0);
     451             :         }
     452             : insert:
     453           0 :         btrfs_release_path(path);
     454             :         /* try to insert the key into the destination tree */
     455             :         ret = btrfs_insert_empty_item(trans, root, path,
     456             :                                       key, item_size);
     457             : 
     458             :         /* make sure any existing item is the correct size */
     459           0 :         if (ret == -EEXIST) {
     460             :                 u32 found_size;
     461           0 :                 found_size = btrfs_item_size_nr(path->nodes[0],
     462             :                                                 path->slots[0]);
     463           0 :                 if (found_size > item_size)
     464           0 :                         btrfs_truncate_item(root, path, item_size, 1);
     465           0 :                 else if (found_size < item_size)
     466           0 :                         btrfs_extend_item(root, path,
     467             :                                           item_size - found_size);
     468           0 :         } else if (ret) {
     469             :                 return ret;
     470             :         }
     471           0 :         dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
     472             :                                         path->slots[0]);
     473             : 
     474             :         /* don't overwrite an existing inode if the generation number
     475             :          * was logged as zero.  This is done when the tree logging code
     476             :          * is just logging an inode to make sure it exists after recovery.
     477             :          *
     478             :          * Also, don't overwrite i_size on directories during replay.
     479             :          * log replay inserts and removes directory items based on the
     480             :          * state of the tree found in the subvolume, and i_size is modified
     481             :          * as it goes
     482             :          */
     483           0 :         if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) {
     484             :                 struct btrfs_inode_item *src_item;
     485             :                 struct btrfs_inode_item *dst_item;
     486             : 
     487           0 :                 src_item = (struct btrfs_inode_item *)src_ptr;
     488           0 :                 dst_item = (struct btrfs_inode_item *)dst_ptr;
     489             : 
     490           0 :                 if (btrfs_inode_generation(eb, src_item) == 0)
     491             :                         goto no_copy;
     492             : 
     493           0 :                 if (overwrite_root &&
     494           0 :                     S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
     495           0 :                     S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) {
     496             :                         save_old_i_size = 1;
     497           0 :                         saved_i_size = btrfs_inode_size(path->nodes[0],
     498             :                                                         dst_item);
     499             :                 }
     500             :         }
     501             : 
     502           0 :         copy_extent_buffer(path->nodes[0], eb, dst_ptr,
     503             :                            src_ptr, item_size);
     504             : 
     505           0 :         if (save_old_i_size) {
     506             :                 struct btrfs_inode_item *dst_item;
     507           0 :                 dst_item = (struct btrfs_inode_item *)dst_ptr;
     508           0 :                 btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size);
     509             :         }
     510             : 
     511             :         /* make sure the generation is filled in */
     512           0 :         if (key->type == BTRFS_INODE_ITEM_KEY) {
     513             :                 struct btrfs_inode_item *dst_item;
     514           0 :                 dst_item = (struct btrfs_inode_item *)dst_ptr;
     515           0 :                 if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) {
     516           0 :                         btrfs_set_inode_generation(path->nodes[0], dst_item,
     517             :                                                    trans->transid);
     518             :                 }
     519             :         }
     520             : no_copy:
     521           0 :         btrfs_mark_buffer_dirty(path->nodes[0]);
     522           0 :         btrfs_release_path(path);
     523           0 :         return 0;
     524             : }
     525             : 
     526             : /*
     527             :  * simple helper to read an inode off the disk from a given root
     528             :  * This can only be called for subvolume roots and not for the log
     529             :  */
     530           0 : static noinline struct inode *read_one_inode(struct btrfs_root *root,
     531             :                                              u64 objectid)
     532             : {
     533             :         struct btrfs_key key;
     534             :         struct inode *inode;
     535             : 
     536           0 :         key.objectid = objectid;
     537           0 :         key.type = BTRFS_INODE_ITEM_KEY;
     538           0 :         key.offset = 0;
     539           0 :         inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
     540           0 :         if (IS_ERR(inode)) {
     541             :                 inode = NULL;
     542           0 :         } else if (is_bad_inode(inode)) {
     543           0 :                 iput(inode);
     544             :                 inode = NULL;
     545             :         }
     546           0 :         return inode;
     547             : }
     548             : 
     549             : /* replays a single extent in 'eb' at 'slot' with 'key' into the
     550             :  * subvolume 'root'.  path is released on entry and should be released
     551             :  * on exit.
     552             :  *
     553             :  * extents in the log tree have not been allocated out of the extent
     554             :  * tree yet.  So, this completes the allocation, taking a reference
     555             :  * as required if the extent already exists or creating a new extent
     556             :  * if it isn't in the extent allocation tree yet.
     557             :  *
     558             :  * The extent is inserted into the file, dropping any existing extents
     559             :  * from the file that overlap the new one.
     560             :  */
     561           0 : static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
     562             :                                       struct btrfs_root *root,
     563             :                                       struct btrfs_path *path,
     564             :                                       struct extent_buffer *eb, int slot,
     565             :                                       struct btrfs_key *key)
     566             : {
     567             :         int found_type;
     568             :         u64 extent_end;
     569           0 :         u64 start = key->offset;
     570             :         u64 nbytes = 0;
     571             :         struct btrfs_file_extent_item *item;
     572             :         struct inode *inode = NULL;
     573             :         unsigned long size;
     574             :         int ret = 0;
     575             : 
     576           0 :         item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
     577           0 :         found_type = btrfs_file_extent_type(eb, item);
     578             : 
     579           0 :         if (found_type == BTRFS_FILE_EXTENT_REG ||
     580             :             found_type == BTRFS_FILE_EXTENT_PREALLOC) {
     581             :                 nbytes = btrfs_file_extent_num_bytes(eb, item);
     582           0 :                 extent_end = start + nbytes;
     583             : 
     584             :                 /*
     585             :                  * We don't add to the inodes nbytes if we are prealloc or a
     586             :                  * hole.
     587             :                  */
     588           0 :                 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
     589             :                         nbytes = 0;
     590           0 :         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
     591           0 :                 size = btrfs_file_extent_inline_len(eb, slot, item);
     592             :                 nbytes = btrfs_file_extent_ram_bytes(eb, item);
     593           0 :                 extent_end = ALIGN(start + size, root->sectorsize);
     594             :         } else {
     595             :                 ret = 0;
     596             :                 goto out;
     597             :         }
     598             : 
     599           0 :         inode = read_one_inode(root, key->objectid);
     600           0 :         if (!inode) {
     601             :                 ret = -EIO;
     602             :                 goto out;
     603             :         }
     604             : 
     605             :         /*
     606             :          * first check to see if we already have this extent in the
     607             :          * file.  This must be done before the btrfs_drop_extents run
     608             :          * so we don't try to drop this extent.
     609             :          */
     610           0 :         ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
     611             :                                        start, 0);
     612             : 
     613           0 :         if (ret == 0 &&
     614             :             (found_type == BTRFS_FILE_EXTENT_REG ||
     615             :              found_type == BTRFS_FILE_EXTENT_PREALLOC)) {
     616             :                 struct btrfs_file_extent_item cmp1;
     617             :                 struct btrfs_file_extent_item cmp2;
     618             :                 struct btrfs_file_extent_item *existing;
     619             :                 struct extent_buffer *leaf;
     620             : 
     621           0 :                 leaf = path->nodes[0];
     622           0 :                 existing = btrfs_item_ptr(leaf, path->slots[0],
     623             :                                           struct btrfs_file_extent_item);
     624             : 
     625           0 :                 read_extent_buffer(eb, &cmp1, (unsigned long)item,
     626             :                                    sizeof(cmp1));
     627           0 :                 read_extent_buffer(leaf, &cmp2, (unsigned long)existing,
     628             :                                    sizeof(cmp2));
     629             : 
     630             :                 /*
     631             :                  * we already have a pointer to this exact extent,
     632             :                  * we don't have to do anything
     633             :                  */
     634           0 :                 if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
     635           0 :                         btrfs_release_path(path);
     636           0 :                         goto out;
     637             :                 }
     638             :         }
     639           0 :         btrfs_release_path(path);
     640             : 
     641             :         /* drop any overlapping extents */
     642           0 :         ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
     643           0 :         if (ret)
     644             :                 goto out;
     645             : 
     646           0 :         if (found_type == BTRFS_FILE_EXTENT_REG ||
     647             :             found_type == BTRFS_FILE_EXTENT_PREALLOC) {
     648             :                 u64 offset;
     649             :                 unsigned long dest_offset;
     650             :                 struct btrfs_key ins;
     651             : 
     652             :                 ret = btrfs_insert_empty_item(trans, root, path, key,
     653             :                                               sizeof(*item));
     654           0 :                 if (ret)
     655             :                         goto out;
     656           0 :                 dest_offset = btrfs_item_ptr_offset(path->nodes[0],
     657             :                                                     path->slots[0]);
     658           0 :                 copy_extent_buffer(path->nodes[0], eb, dest_offset,
     659             :                                 (unsigned long)item,  sizeof(*item));
     660             : 
     661           0 :                 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
     662           0 :                 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
     663           0 :                 ins.type = BTRFS_EXTENT_ITEM_KEY;
     664           0 :                 offset = key->offset - btrfs_file_extent_offset(eb, item);
     665             : 
     666           0 :                 if (ins.objectid > 0) {
     667             :                         u64 csum_start;
     668             :                         u64 csum_end;
     669           0 :                         LIST_HEAD(ordered_sums);
     670             :                         /*
     671             :                          * is this extent already allocated in the extent
     672             :                          * allocation tree?  If so, just add a reference
     673             :                          */
     674           0 :                         ret = btrfs_lookup_extent(root, ins.objectid,
     675             :                                                 ins.offset);
     676           0 :                         if (ret == 0) {
     677           0 :                                 ret = btrfs_inc_extent_ref(trans, root,
     678             :                                                 ins.objectid, ins.offset,
     679             :                                                 0, root->root_key.objectid,
     680             :                                                 key->objectid, offset, 0);
     681           0 :                                 if (ret)
     682             :                                         goto out;
     683             :                         } else {
     684             :                                 /*
     685             :                                  * insert the extent pointer in the extent
     686             :                                  * allocation tree
     687             :                                  */
     688           0 :                                 ret = btrfs_alloc_logged_file_extent(trans,
     689             :                                                 root, root->root_key.objectid,
     690             :                                                 key->objectid, offset, &ins);
     691           0 :                                 if (ret)
     692             :                                         goto out;
     693             :                         }
     694           0 :                         btrfs_release_path(path);
     695             : 
     696           0 :                         if (btrfs_file_extent_compression(eb, item)) {
     697           0 :                                 csum_start = ins.objectid;
     698           0 :                                 csum_end = csum_start + ins.offset;
     699             :                         } else {
     700           0 :                                 csum_start = ins.objectid +
     701             :                                         btrfs_file_extent_offset(eb, item);
     702           0 :                                 csum_end = csum_start +
     703             :                                         btrfs_file_extent_num_bytes(eb, item);
     704             :                         }
     705             : 
     706           0 :                         ret = btrfs_lookup_csums_range(root->log_root,
     707             :                                                 csum_start, csum_end - 1,
     708             :                                                 &ordered_sums, 0);
     709           0 :                         if (ret)
     710             :                                 goto out;
     711           0 :                         while (!list_empty(&ordered_sums)) {
     712             :                                 struct btrfs_ordered_sum *sums;
     713           0 :                                 sums = list_entry(ordered_sums.next,
     714             :                                                 struct btrfs_ordered_sum,
     715             :                                                 list);
     716           0 :                                 if (!ret)
     717           0 :                                         ret = btrfs_csum_file_blocks(trans,
     718           0 :                                                 root->fs_info->csum_root,
     719             :                                                 sums);
     720           0 :                                 list_del(&sums->list);
     721           0 :                                 kfree(sums);
     722             :                         }
     723           0 :                         if (ret)
     724             :                                 goto out;
     725             :                 } else {
     726           0 :                         btrfs_release_path(path);
     727             :                 }
     728           0 :         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
     729             :                 /* inline extents are easy, we just overwrite them */
     730           0 :                 ret = overwrite_item(trans, root, path, eb, slot, key);
     731           0 :                 if (ret)
     732             :                         goto out;
     733             :         }
     734             : 
     735           0 :         inode_add_bytes(inode, nbytes);
     736           0 :         ret = btrfs_update_inode(trans, root, inode);
     737             : out:
     738           0 :         if (inode)
     739           0 :                 iput(inode);
     740           0 :         return ret;
     741             : }
     742             : 
     743             : /*
     744             :  * when cleaning up conflicts between the directory names in the
     745             :  * subvolume, directory names in the log and directory names in the
     746             :  * inode back references, we may have to unlink inodes from directories.
     747             :  *
     748             :  * This is a helper function to do the unlink of a specific directory
     749             :  * item
     750             :  */
     751           0 : static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
     752             :                                       struct btrfs_root *root,
     753             :                                       struct btrfs_path *path,
     754             :                                       struct inode *dir,
     755             :                                       struct btrfs_dir_item *di)
     756             : {
     757             :         struct inode *inode;
     758             :         char *name;
     759             :         int name_len;
     760             :         struct extent_buffer *leaf;
     761             :         struct btrfs_key location;
     762             :         int ret;
     763             : 
     764           0 :         leaf = path->nodes[0];
     765             : 
     766           0 :         btrfs_dir_item_key_to_cpu(leaf, di, &location);
     767           0 :         name_len = btrfs_dir_name_len(leaf, di);
     768           0 :         name = kmalloc(name_len, GFP_NOFS);
     769           0 :         if (!name)
     770             :                 return -ENOMEM;
     771             : 
     772           0 :         read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
     773           0 :         btrfs_release_path(path);
     774             : 
     775           0 :         inode = read_one_inode(root, location.objectid);
     776           0 :         if (!inode) {
     777             :                 ret = -EIO;
     778             :                 goto out;
     779             :         }
     780             : 
     781           0 :         ret = link_to_fixup_dir(trans, root, path, location.objectid);
     782           0 :         if (ret)
     783             :                 goto out;
     784             : 
     785           0 :         ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
     786           0 :         if (ret)
     787             :                 goto out;
     788             :         else
     789           0 :                 ret = btrfs_run_delayed_items(trans, root);
     790             : out:
     791           0 :         kfree(name);
     792           0 :         iput(inode);
     793           0 :         return ret;
     794             : }
     795             : 
     796             : /*
     797             :  * helper function to see if a given name and sequence number found
     798             :  * in an inode back reference are already in a directory and correctly
     799             :  * point to this inode
     800             :  */
     801           0 : static noinline int inode_in_dir(struct btrfs_root *root,
     802             :                                  struct btrfs_path *path,
     803             :                                  u64 dirid, u64 objectid, u64 index,
     804             :                                  const char *name, int name_len)
     805             : {
     806             :         struct btrfs_dir_item *di;
     807             :         struct btrfs_key location;
     808             :         int match = 0;
     809             : 
     810           0 :         di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
     811             :                                          index, name, name_len, 0);
     812           0 :         if (di && !IS_ERR(di)) {
     813           0 :                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
     814           0 :                 if (location.objectid != objectid)
     815             :                         goto out;
     816             :         } else
     817             :                 goto out;
     818           0 :         btrfs_release_path(path);
     819             : 
     820           0 :         di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
     821           0 :         if (di && !IS_ERR(di)) {
     822           0 :                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
     823           0 :                 if (location.objectid != objectid)
     824             :                         goto out;
     825             :         } else
     826             :                 goto out;
     827             :         match = 1;
     828             : out:
     829           0 :         btrfs_release_path(path);
     830           0 :         return match;
     831             : }
     832             : 
     833             : /*
     834             :  * helper function to check a log tree for a named back reference in
     835             :  * an inode.  This is used to decide if a back reference that is
     836             :  * found in the subvolume conflicts with what we find in the log.
     837             :  *
     838             :  * inode backreferences may have multiple refs in a single item,
     839             :  * during replay we process one reference at a time, and we don't
     840             :  * want to delete valid links to a file from the subvolume if that
     841             :  * link is also in the log.
     842             :  */
     843           0 : static noinline int backref_in_log(struct btrfs_root *log,
     844             :                                    struct btrfs_key *key,
     845             :                                    u64 ref_objectid,
     846             :                                    char *name, int namelen)
     847             : {
     848             :         struct btrfs_path *path;
     849             :         struct btrfs_inode_ref *ref;
     850             :         unsigned long ptr;
     851             :         unsigned long ptr_end;
     852             :         unsigned long name_ptr;
     853             :         int found_name_len;
     854             :         int item_size;
     855             :         int ret;
     856             :         int match = 0;
     857             : 
     858           0 :         path = btrfs_alloc_path();
     859           0 :         if (!path)
     860             :                 return -ENOMEM;
     861             : 
     862           0 :         ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
     863           0 :         if (ret != 0)
     864             :                 goto out;
     865             : 
     866           0 :         ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
     867             : 
     868           0 :         if (key->type == BTRFS_INODE_EXTREF_KEY) {
     869           0 :                 if (btrfs_find_name_in_ext_backref(path, ref_objectid,
     870             :                                                    name, namelen, NULL))
     871             :                         match = 1;
     872             : 
     873             :                 goto out;
     874             :         }
     875             : 
     876           0 :         item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
     877           0 :         ptr_end = ptr + item_size;
     878           0 :         while (ptr < ptr_end) {
     879           0 :                 ref = (struct btrfs_inode_ref *)ptr;
     880           0 :                 found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref);
     881           0 :                 if (found_name_len == namelen) {
     882           0 :                         name_ptr = (unsigned long)(ref + 1);
     883           0 :                         ret = memcmp_extent_buffer(path->nodes[0], name,
     884             :                                                    name_ptr, namelen);
     885           0 :                         if (ret == 0) {
     886             :                                 match = 1;
     887             :                                 goto out;
     888             :                         }
     889             :                 }
     890           0 :                 ptr = (unsigned long)(ref + 1) + found_name_len;
     891             :         }
     892             : out:
     893           0 :         btrfs_free_path(path);
     894           0 :         return match;
     895             : }
     896             : 
     897           0 : static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
     898             :                                   struct btrfs_root *root,
     899             :                                   struct btrfs_path *path,
     900             :                                   struct btrfs_root *log_root,
     901             :                                   struct inode *dir, struct inode *inode,
     902             :                                   struct extent_buffer *eb,
     903             :                                   u64 inode_objectid, u64 parent_objectid,
     904             :                                   u64 ref_index, char *name, int namelen,
     905             :                                   int *search_done)
     906             : {
     907             :         int ret;
     908             :         char *victim_name;
     909             :         int victim_name_len;
     910             :         struct extent_buffer *leaf;
     911             :         struct btrfs_dir_item *di;
     912             :         struct btrfs_key search_key;
     913             :         struct btrfs_inode_extref *extref;
     914             : 
     915             : again:
     916             :         /* Search old style refs */
     917           0 :         search_key.objectid = inode_objectid;
     918           0 :         search_key.type = BTRFS_INODE_REF_KEY;
     919           0 :         search_key.offset = parent_objectid;
     920           0 :         ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
     921           0 :         if (ret == 0) {
     922             :                 struct btrfs_inode_ref *victim_ref;
     923             :                 unsigned long ptr;
     924             :                 unsigned long ptr_end;
     925             : 
     926           0 :                 leaf = path->nodes[0];
     927             : 
     928             :                 /* are we trying to overwrite a back ref for the root directory
     929             :                  * if so, just jump out, we're done
     930             :                  */
     931           0 :                 if (search_key.objectid == search_key.offset)
     932             :                         return 1;
     933             : 
     934             :                 /* check all the names in this back reference to see
     935             :                  * if they are in the log.  if so, we allow them to stay
     936             :                  * otherwise they must be unlinked as a conflict
     937             :                  */
     938           0 :                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
     939           0 :                 ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]);
     940           0 :                 while (ptr < ptr_end) {
     941           0 :                         victim_ref = (struct btrfs_inode_ref *)ptr;
     942           0 :                         victim_name_len = btrfs_inode_ref_name_len(leaf,
     943             :                                                                    victim_ref);
     944           0 :                         victim_name = kmalloc(victim_name_len, GFP_NOFS);
     945           0 :                         if (!victim_name)
     946             :                                 return -ENOMEM;
     947             : 
     948           0 :                         read_extent_buffer(leaf, victim_name,
     949           0 :                                            (unsigned long)(victim_ref + 1),
     950             :                                            victim_name_len);
     951             : 
     952           0 :                         if (!backref_in_log(log_root, &search_key,
     953             :                                             parent_objectid,
     954             :                                             victim_name,
     955             :                                             victim_name_len)) {
     956           0 :                                 inc_nlink(inode);
     957           0 :                                 btrfs_release_path(path);
     958             : 
     959           0 :                                 ret = btrfs_unlink_inode(trans, root, dir,
     960             :                                                          inode, victim_name,
     961             :                                                          victim_name_len);
     962           0 :                                 kfree(victim_name);
     963           0 :                                 if (ret)
     964             :                                         return ret;
     965           0 :                                 ret = btrfs_run_delayed_items(trans, root);
     966           0 :                                 if (ret)
     967             :                                         return ret;
     968           0 :                                 *search_done = 1;
     969             :                                 goto again;
     970             :                         }
     971           0 :                         kfree(victim_name);
     972             : 
     973           0 :                         ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
     974             :                 }
     975             : 
     976             :                 /*
     977             :                  * NOTE: we have searched root tree and checked the
     978             :                  * coresponding ref, it does not need to check again.
     979             :                  */
     980           0 :                 *search_done = 1;
     981             :         }
     982           0 :         btrfs_release_path(path);
     983             : 
     984             :         /* Same search but for extended refs */
     985           0 :         extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
     986             :                                            inode_objectid, parent_objectid, 0,
     987             :                                            0);
     988           0 :         if (!IS_ERR_OR_NULL(extref)) {
     989             :                 u32 item_size;
     990             :                 u32 cur_offset = 0;
     991             :                 unsigned long base;
     992             :                 struct inode *victim_parent;
     993             : 
     994           0 :                 leaf = path->nodes[0];
     995             : 
     996           0 :                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
     997           0 :                 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
     998             : 
     999           0 :                 while (cur_offset < item_size) {
    1000           0 :                         extref = (struct btrfs_inode_extref *)base + cur_offset;
    1001             : 
    1002           0 :                         victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
    1003             : 
    1004           0 :                         if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
    1005             :                                 goto next;
    1006             : 
    1007           0 :                         victim_name = kmalloc(victim_name_len, GFP_NOFS);
    1008           0 :                         if (!victim_name)
    1009             :                                 return -ENOMEM;
    1010           0 :                         read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
    1011             :                                            victim_name_len);
    1012             : 
    1013           0 :                         search_key.objectid = inode_objectid;
    1014           0 :                         search_key.type = BTRFS_INODE_EXTREF_KEY;
    1015           0 :                         search_key.offset = btrfs_extref_hash(parent_objectid,
    1016             :                                                               victim_name,
    1017             :                                                               victim_name_len);
    1018             :                         ret = 0;
    1019           0 :                         if (!backref_in_log(log_root, &search_key,
    1020             :                                             parent_objectid, victim_name,
    1021             :                                             victim_name_len)) {
    1022             :                                 ret = -ENOENT;
    1023           0 :                                 victim_parent = read_one_inode(root,
    1024             :                                                                parent_objectid);
    1025           0 :                                 if (victim_parent) {
    1026           0 :                                         inc_nlink(inode);
    1027           0 :                                         btrfs_release_path(path);
    1028             : 
    1029           0 :                                         ret = btrfs_unlink_inode(trans, root,
    1030             :                                                                  victim_parent,
    1031             :                                                                  inode,
    1032             :                                                                  victim_name,
    1033             :                                                                  victim_name_len);
    1034           0 :                                         if (!ret)
    1035           0 :                                                 ret = btrfs_run_delayed_items(
    1036             :                                                                   trans, root);
    1037             :                                 }
    1038           0 :                                 iput(victim_parent);
    1039           0 :                                 kfree(victim_name);
    1040           0 :                                 if (ret)
    1041             :                                         return ret;
    1042           0 :                                 *search_done = 1;
    1043             :                                 goto again;
    1044             :                         }
    1045           0 :                         kfree(victim_name);
    1046             :                         if (ret)
    1047             :                                 return ret;
    1048             : next:
    1049           0 :                         cur_offset += victim_name_len + sizeof(*extref);
    1050             :                 }
    1051           0 :                 *search_done = 1;
    1052             :         }
    1053           0 :         btrfs_release_path(path);
    1054             : 
    1055             :         /* look for a conflicting sequence number */
    1056           0 :         di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
    1057             :                                          ref_index, name, namelen, 0);
    1058           0 :         if (di && !IS_ERR(di)) {
    1059           0 :                 ret = drop_one_dir_item(trans, root, path, dir, di);
    1060           0 :                 if (ret)
    1061             :                         return ret;
    1062             :         }
    1063           0 :         btrfs_release_path(path);
    1064             : 
    1065             :         /* look for a conflicing name */
    1066           0 :         di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
    1067             :                                    name, namelen, 0);
    1068           0 :         if (di && !IS_ERR(di)) {
    1069           0 :                 ret = drop_one_dir_item(trans, root, path, dir, di);
    1070           0 :                 if (ret)
    1071             :                         return ret;
    1072             :         }
    1073           0 :         btrfs_release_path(path);
    1074             : 
    1075             :         return 0;
    1076             : }
    1077             : 
    1078           0 : static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
    1079             :                              u32 *namelen, char **name, u64 *index,
    1080             :                              u64 *parent_objectid)
    1081             : {
    1082             :         struct btrfs_inode_extref *extref;
    1083             : 
    1084           0 :         extref = (struct btrfs_inode_extref *)ref_ptr;
    1085             : 
    1086           0 :         *namelen = btrfs_inode_extref_name_len(eb, extref);
    1087           0 :         *name = kmalloc(*namelen, GFP_NOFS);
    1088           0 :         if (*name == NULL)
    1089             :                 return -ENOMEM;
    1090             : 
    1091           0 :         read_extent_buffer(eb, *name, (unsigned long)&extref->name,
    1092           0 :                            *namelen);
    1093             : 
    1094           0 :         *index = btrfs_inode_extref_index(eb, extref);
    1095           0 :         if (parent_objectid)
    1096           0 :                 *parent_objectid = btrfs_inode_extref_parent(eb, extref);
    1097             : 
    1098             :         return 0;
    1099             : }
    1100             : 
    1101           0 : static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
    1102             :                           u32 *namelen, char **name, u64 *index)
    1103             : {
    1104             :         struct btrfs_inode_ref *ref;
    1105             : 
    1106           0 :         ref = (struct btrfs_inode_ref *)ref_ptr;
    1107             : 
    1108           0 :         *namelen = btrfs_inode_ref_name_len(eb, ref);
    1109           0 :         *name = kmalloc(*namelen, GFP_NOFS);
    1110           0 :         if (*name == NULL)
    1111             :                 return -ENOMEM;
    1112             : 
    1113           0 :         read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
    1114             : 
    1115           0 :         *index = btrfs_inode_ref_index(eb, ref);
    1116             : 
    1117           0 :         return 0;
    1118             : }
    1119             : 
    1120             : /*
    1121             :  * replay one inode back reference item found in the log tree.
    1122             :  * eb, slot and key refer to the buffer and key found in the log tree.
    1123             :  * root is the destination we are replaying into, and path is for temp
    1124             :  * use by this function.  (it should be released on return).
    1125             :  */
    1126           0 : static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
    1127             :                                   struct btrfs_root *root,
    1128             :                                   struct btrfs_root *log,
    1129             :                                   struct btrfs_path *path,
    1130             :                                   struct extent_buffer *eb, int slot,
    1131             :                                   struct btrfs_key *key)
    1132             : {
    1133             :         struct inode *dir = NULL;
    1134             :         struct inode *inode = NULL;
    1135             :         unsigned long ref_ptr;
    1136             :         unsigned long ref_end;
    1137           0 :         char *name = NULL;
    1138             :         int namelen;
    1139             :         int ret;
    1140           0 :         int search_done = 0;
    1141             :         int log_ref_ver = 0;
    1142             :         u64 parent_objectid;
    1143             :         u64 inode_objectid;
    1144           0 :         u64 ref_index = 0;
    1145             :         int ref_struct_size;
    1146             : 
    1147           0 :         ref_ptr = btrfs_item_ptr_offset(eb, slot);
    1148           0 :         ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
    1149             : 
    1150           0 :         if (key->type == BTRFS_INODE_EXTREF_KEY) {
    1151             :                 struct btrfs_inode_extref *r;
    1152             : 
    1153             :                 ref_struct_size = sizeof(struct btrfs_inode_extref);
    1154             :                 log_ref_ver = 1;
    1155           0 :                 r = (struct btrfs_inode_extref *)ref_ptr;
    1156           0 :                 parent_objectid = btrfs_inode_extref_parent(eb, r);
    1157             :         } else {
    1158             :                 ref_struct_size = sizeof(struct btrfs_inode_ref);
    1159           0 :                 parent_objectid = key->offset;
    1160             :         }
    1161           0 :         inode_objectid = key->objectid;
    1162             : 
    1163             :         /*
    1164             :          * it is possible that we didn't log all the parent directories
    1165             :          * for a given inode.  If we don't find the dir, just don't
    1166             :          * copy the back ref in.  The link count fixup code will take
    1167             :          * care of the rest
    1168             :          */
    1169           0 :         dir = read_one_inode(root, parent_objectid);
    1170           0 :         if (!dir) {
    1171             :                 ret = -ENOENT;
    1172             :                 goto out;
    1173             :         }
    1174             : 
    1175           0 :         inode = read_one_inode(root, inode_objectid);
    1176           0 :         if (!inode) {
    1177             :                 ret = -EIO;
    1178             :                 goto out;
    1179             :         }
    1180             : 
    1181           0 :         while (ref_ptr < ref_end) {
    1182           0 :                 if (log_ref_ver) {
    1183           0 :                         ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
    1184             :                                                 &ref_index, &parent_objectid);
    1185             :                         /*
    1186             :                          * parent object can change from one array
    1187             :                          * item to another.
    1188             :                          */
    1189           0 :                         if (!dir)
    1190           0 :                                 dir = read_one_inode(root, parent_objectid);
    1191           0 :                         if (!dir) {
    1192             :                                 ret = -ENOENT;
    1193             :                                 goto out;
    1194             :                         }
    1195             :                 } else {
    1196           0 :                         ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
    1197             :                                              &ref_index);
    1198             :                 }
    1199           0 :                 if (ret)
    1200             :                         goto out;
    1201             : 
    1202             :                 /* if we already have a perfect match, we're done */
    1203           0 :                 if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
    1204             :                                   ref_index, name, namelen)) {
    1205             :                         /*
    1206             :                          * look for a conflicting back reference in the
    1207             :                          * metadata. if we find one we have to unlink that name
    1208             :                          * of the file before we add our new link.  Later on, we
    1209             :                          * overwrite any existing back reference, and we don't
    1210             :                          * want to create dangling pointers in the directory.
    1211             :                          */
    1212             : 
    1213           0 :                         if (!search_done) {
    1214           0 :                                 ret = __add_inode_ref(trans, root, path, log,
    1215             :                                                       dir, inode, eb,
    1216             :                                                       inode_objectid,
    1217             :                                                       parent_objectid,
    1218             :                                                       ref_index, name, namelen,
    1219             :                                                       &search_done);
    1220           0 :                                 if (ret) {
    1221           0 :                                         if (ret == 1)
    1222             :                                                 ret = 0;
    1223             :                                         goto out;
    1224             :                                 }
    1225             :                         }
    1226             : 
    1227             :                         /* insert our name */
    1228           0 :                         ret = btrfs_add_link(trans, dir, inode, name, namelen,
    1229             :                                              0, ref_index);
    1230           0 :                         if (ret)
    1231             :                                 goto out;
    1232             : 
    1233           0 :                         btrfs_update_inode(trans, root, inode);
    1234             :                 }
    1235             : 
    1236           0 :                 ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
    1237           0 :                 kfree(name);
    1238           0 :                 name = NULL;
    1239           0 :                 if (log_ref_ver) {
    1240           0 :                         iput(dir);
    1241             :                         dir = NULL;
    1242             :                 }
    1243             :         }
    1244             : 
    1245             :         /* finally write the back reference in the inode */
    1246           0 :         ret = overwrite_item(trans, root, path, eb, slot, key);
    1247             : out:
    1248           0 :         btrfs_release_path(path);
    1249           0 :         kfree(name);
    1250           0 :         iput(dir);
    1251           0 :         iput(inode);
    1252           0 :         return ret;
    1253             : }
    1254             : 
    1255           0 : static int insert_orphan_item(struct btrfs_trans_handle *trans,
    1256             :                               struct btrfs_root *root, u64 offset)
    1257             : {
    1258             :         int ret;
    1259           0 :         ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
    1260             :                         offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
    1261           0 :         if (ret > 0)
    1262           0 :                 ret = btrfs_insert_orphan_item(trans, root, offset);
    1263           0 :         return ret;
    1264             : }
    1265             : 
    1266           0 : static int count_inode_extrefs(struct btrfs_root *root,
    1267             :                                struct inode *inode, struct btrfs_path *path)
    1268             : {
    1269             :         int ret = 0;
    1270             :         int name_len;
    1271             :         unsigned int nlink = 0;
    1272             :         u32 item_size;
    1273             :         u32 cur_offset = 0;
    1274             :         u64 inode_objectid = btrfs_ino(inode);
    1275           0 :         u64 offset = 0;
    1276             :         unsigned long ptr;
    1277             :         struct btrfs_inode_extref *extref;
    1278             :         struct extent_buffer *leaf;
    1279             : 
    1280             :         while (1) {
    1281           0 :                 ret = btrfs_find_one_extref(root, inode_objectid, offset, path,
    1282             :                                             &extref, &offset);
    1283           0 :                 if (ret)
    1284             :                         break;
    1285             : 
    1286           0 :                 leaf = path->nodes[0];
    1287           0 :                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
    1288           0 :                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
    1289             : 
    1290           0 :                 while (cur_offset < item_size) {
    1291           0 :                         extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
    1292             :                         name_len = btrfs_inode_extref_name_len(leaf, extref);
    1293             : 
    1294           0 :                         nlink++;
    1295             : 
    1296           0 :                         cur_offset += name_len + sizeof(*extref);
    1297             :                 }
    1298             : 
    1299           0 :                 offset++;
    1300           0 :                 btrfs_release_path(path);
    1301           0 :         }
    1302           0 :         btrfs_release_path(path);
    1303             : 
    1304           0 :         if (ret < 0)
    1305             :                 return ret;
    1306           0 :         return nlink;
    1307             : }
    1308             : 
    1309           0 : static int count_inode_refs(struct btrfs_root *root,
    1310             :                                struct inode *inode, struct btrfs_path *path)
    1311             : {
    1312             :         int ret;
    1313             :         struct btrfs_key key;
    1314             :         unsigned int nlink = 0;
    1315             :         unsigned long ptr;
    1316             :         unsigned long ptr_end;
    1317             :         int name_len;
    1318             :         u64 ino = btrfs_ino(inode);
    1319             : 
    1320           0 :         key.objectid = ino;
    1321           0 :         key.type = BTRFS_INODE_REF_KEY;
    1322           0 :         key.offset = (u64)-1;
    1323             : 
    1324             :         while (1) {
    1325           0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    1326           0 :                 if (ret < 0)
    1327             :                         break;
    1328           0 :                 if (ret > 0) {
    1329           0 :                         if (path->slots[0] == 0)
    1330             :                                 break;
    1331           0 :                         path->slots[0]--;
    1332             :                 }
    1333             : process_slot:
    1334           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    1335             :                                       path->slots[0]);
    1336           0 :                 if (key.objectid != ino ||
    1337           0 :                     key.type != BTRFS_INODE_REF_KEY)
    1338             :                         break;
    1339           0 :                 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
    1340           0 :                 ptr_end = ptr + btrfs_item_size_nr(path->nodes[0],
    1341             :                                                    path->slots[0]);
    1342           0 :                 while (ptr < ptr_end) {
    1343             :                         struct btrfs_inode_ref *ref;
    1344             : 
    1345           0 :                         ref = (struct btrfs_inode_ref *)ptr;
    1346           0 :                         name_len = btrfs_inode_ref_name_len(path->nodes[0],
    1347             :                                                             ref);
    1348           0 :                         ptr = (unsigned long)(ref + 1) + name_len;
    1349           0 :                         nlink++;
    1350             :                 }
    1351             : 
    1352           0 :                 if (key.offset == 0)
    1353             :                         break;
    1354           0 :                 if (path->slots[0] > 0) {
    1355           0 :                         path->slots[0]--;
    1356           0 :                         goto process_slot;
    1357             :                 }
    1358           0 :                 key.offset--;
    1359           0 :                 btrfs_release_path(path);
    1360           0 :         }
    1361           0 :         btrfs_release_path(path);
    1362             : 
    1363           0 :         return nlink;
    1364             : }
    1365             : 
    1366             : /*
    1367             :  * There are a few corners where the link count of the file can't
    1368             :  * be properly maintained during replay.  So, instead of adding
    1369             :  * lots of complexity to the log code, we just scan the backrefs
    1370             :  * for any file that has been through replay.
    1371             :  *
    1372             :  * The scan will update the link count on the inode to reflect the
    1373             :  * number of back refs found.  If it goes down to zero, the iput
    1374             :  * will free the inode.
    1375             :  */
    1376           0 : static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
    1377             :                                            struct btrfs_root *root,
    1378             :                                            struct inode *inode)
    1379             : {
    1380             :         struct btrfs_path *path;
    1381             :         int ret;
    1382             :         u64 nlink = 0;
    1383             :         u64 ino = btrfs_ino(inode);
    1384             : 
    1385           0 :         path = btrfs_alloc_path();
    1386           0 :         if (!path)
    1387             :                 return -ENOMEM;
    1388             : 
    1389           0 :         ret = count_inode_refs(root, inode, path);
    1390           0 :         if (ret < 0)
    1391             :                 goto out;
    1392             : 
    1393           0 :         nlink = ret;
    1394             : 
    1395           0 :         ret = count_inode_extrefs(root, inode, path);
    1396           0 :         if (ret == -ENOENT)
    1397             :                 ret = 0;
    1398             : 
    1399           0 :         if (ret < 0)
    1400             :                 goto out;
    1401             : 
    1402           0 :         nlink += ret;
    1403             : 
    1404             :         ret = 0;
    1405             : 
    1406           0 :         if (nlink != inode->i_nlink) {
    1407           0 :                 set_nlink(inode, nlink);
    1408           0 :                 btrfs_update_inode(trans, root, inode);
    1409             :         }
    1410           0 :         BTRFS_I(inode)->index_cnt = (u64)-1;
    1411             : 
    1412           0 :         if (inode->i_nlink == 0) {
    1413           0 :                 if (S_ISDIR(inode->i_mode)) {
    1414           0 :                         ret = replay_dir_deletes(trans, root, NULL, path,
    1415             :                                                  ino, 1);
    1416           0 :                         if (ret)
    1417             :                                 goto out;
    1418             :                 }
    1419           0 :                 ret = insert_orphan_item(trans, root, ino);
    1420             :         }
    1421             : 
    1422             : out:
    1423           0 :         btrfs_free_path(path);
    1424           0 :         return ret;
    1425             : }
    1426             : 
    1427           0 : static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
    1428             :                                             struct btrfs_root *root,
    1429             :                                             struct btrfs_path *path)
    1430             : {
    1431             :         int ret;
    1432             :         struct btrfs_key key;
    1433             :         struct inode *inode;
    1434             : 
    1435           0 :         key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
    1436           0 :         key.type = BTRFS_ORPHAN_ITEM_KEY;
    1437           0 :         key.offset = (u64)-1;
    1438             :         while (1) {
    1439           0 :                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    1440           0 :                 if (ret < 0)
    1441             :                         break;
    1442             : 
    1443           0 :                 if (ret == 1) {
    1444           0 :                         if (path->slots[0] == 0)
    1445             :                                 break;
    1446           0 :                         path->slots[0]--;
    1447             :                 }
    1448             : 
    1449           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    1450           0 :                 if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID ||
    1451           0 :                     key.type != BTRFS_ORPHAN_ITEM_KEY)
    1452             :                         break;
    1453             : 
    1454             :                 ret = btrfs_del_item(trans, root, path);
    1455           0 :                 if (ret)
    1456             :                         goto out;
    1457             : 
    1458           0 :                 btrfs_release_path(path);
    1459           0 :                 inode = read_one_inode(root, key.offset);
    1460           0 :                 if (!inode)
    1461             :                         return -EIO;
    1462             : 
    1463           0 :                 ret = fixup_inode_link_count(trans, root, inode);
    1464           0 :                 iput(inode);
    1465           0 :                 if (ret)
    1466             :                         goto out;
    1467             : 
    1468             :                 /*
    1469             :                  * fixup on a directory may create new entries,
    1470             :                  * make sure we always look for the highset possible
    1471             :                  * offset
    1472             :                  */
    1473           0 :                 key.offset = (u64)-1;
    1474           0 :         }
    1475             :         ret = 0;
    1476             : out:
    1477           0 :         btrfs_release_path(path);
    1478           0 :         return ret;
    1479             : }
    1480             : 
    1481             : 
    1482             : /*
    1483             :  * record a given inode in the fixup dir so we can check its link
    1484             :  * count when replay is done.  The link count is incremented here
    1485             :  * so the inode won't go away until we check it
    1486             :  */
    1487           0 : static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
    1488             :                                       struct btrfs_root *root,
    1489             :                                       struct btrfs_path *path,
    1490             :                                       u64 objectid)
    1491             : {
    1492             :         struct btrfs_key key;
    1493             :         int ret = 0;
    1494             :         struct inode *inode;
    1495             : 
    1496           0 :         inode = read_one_inode(root, objectid);
    1497           0 :         if (!inode)
    1498             :                 return -EIO;
    1499             : 
    1500           0 :         key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
    1501             :         btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
    1502           0 :         key.offset = objectid;
    1503             : 
    1504             :         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
    1505             : 
    1506           0 :         btrfs_release_path(path);
    1507           0 :         if (ret == 0) {
    1508           0 :                 if (!inode->i_nlink)
    1509           0 :                         set_nlink(inode, 1);
    1510             :                 else
    1511           0 :                         inc_nlink(inode);
    1512           0 :                 ret = btrfs_update_inode(trans, root, inode);
    1513           0 :         } else if (ret == -EEXIST) {
    1514             :                 ret = 0;
    1515             :         } else {
    1516           0 :                 BUG(); /* Logic Error */
    1517             :         }
    1518           0 :         iput(inode);
    1519             : 
    1520           0 :         return ret;
    1521             : }
    1522             : 
    1523             : /*
    1524             :  * when replaying the log for a directory, we only insert names
    1525             :  * for inodes that actually exist.  This means an fsync on a directory
    1526             :  * does not implicitly fsync all the new files in it
    1527             :  */
    1528           0 : static noinline int insert_one_name(struct btrfs_trans_handle *trans,
    1529             :                                     struct btrfs_root *root,
    1530             :                                     struct btrfs_path *path,
    1531             :                                     u64 dirid, u64 index,
    1532             :                                     char *name, int name_len, u8 type,
    1533             :                                     struct btrfs_key *location)
    1534             : {
    1535             :         struct inode *inode;
    1536             :         struct inode *dir;
    1537             :         int ret;
    1538             : 
    1539           0 :         inode = read_one_inode(root, location->objectid);
    1540           0 :         if (!inode)
    1541             :                 return -ENOENT;
    1542             : 
    1543           0 :         dir = read_one_inode(root, dirid);
    1544           0 :         if (!dir) {
    1545           0 :                 iput(inode);
    1546             :                 return -EIO;
    1547             :         }
    1548             : 
    1549           0 :         ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
    1550             : 
    1551             :         /* FIXME, put inode into FIXUP list */
    1552             : 
    1553           0 :         iput(inode);
    1554           0 :         iput(dir);
    1555             :         return ret;
    1556             : }
    1557             : 
    1558             : /*
    1559             :  * take a single entry in a log directory item and replay it into
    1560             :  * the subvolume.
    1561             :  *
    1562             :  * if a conflicting item exists in the subdirectory already,
    1563             :  * the inode it points to is unlinked and put into the link count
    1564             :  * fix up tree.
    1565             :  *
    1566             :  * If a name from the log points to a file or directory that does
    1567             :  * not exist in the FS, it is skipped.  fsyncs on directories
    1568             :  * do not force down inodes inside that directory, just changes to the
    1569             :  * names or unlinks in a directory.
    1570             :  */
    1571           0 : static noinline int replay_one_name(struct btrfs_trans_handle *trans,
    1572             :                                     struct btrfs_root *root,
    1573             :                                     struct btrfs_path *path,
    1574             :                                     struct extent_buffer *eb,
    1575             :                                     struct btrfs_dir_item *di,
    1576             :                                     struct btrfs_key *key)
    1577             : {
    1578             :         char *name;
    1579             :         int name_len;
    1580             :         struct btrfs_dir_item *dst_di;
    1581             :         struct btrfs_key found_key;
    1582             :         struct btrfs_key log_key;
    1583             :         struct inode *dir;
    1584             :         u8 log_type;
    1585             :         int exists;
    1586             :         int ret = 0;
    1587           0 :         bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
    1588             : 
    1589           0 :         dir = read_one_inode(root, key->objectid);
    1590           0 :         if (!dir)
    1591             :                 return -EIO;
    1592             : 
    1593           0 :         name_len = btrfs_dir_name_len(eb, di);
    1594           0 :         name = kmalloc(name_len, GFP_NOFS);
    1595           0 :         if (!name) {
    1596             :                 ret = -ENOMEM;
    1597             :                 goto out;
    1598             :         }
    1599             : 
    1600             :         log_type = btrfs_dir_type(eb, di);
    1601           0 :         read_extent_buffer(eb, name, (unsigned long)(di + 1),
    1602             :                    name_len);
    1603             : 
    1604           0 :         btrfs_dir_item_key_to_cpu(eb, di, &log_key);
    1605           0 :         exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
    1606           0 :         if (exists == 0)
    1607             :                 exists = 1;
    1608             :         else
    1609             :                 exists = 0;
    1610           0 :         btrfs_release_path(path);
    1611             : 
    1612           0 :         if (key->type == BTRFS_DIR_ITEM_KEY) {
    1613           0 :                 dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
    1614             :                                        name, name_len, 1);
    1615           0 :         } else if (key->type == BTRFS_DIR_INDEX_KEY) {
    1616           0 :                 dst_di = btrfs_lookup_dir_index_item(trans, root, path,
    1617             :                                                      key->objectid,
    1618             :                                                      key->offset, name,
    1619             :                                                      name_len, 1);
    1620             :         } else {
    1621             :                 /* Corruption */
    1622             :                 ret = -EINVAL;
    1623             :                 goto out;
    1624             :         }
    1625           0 :         if (IS_ERR_OR_NULL(dst_di)) {
    1626             :                 /* we need a sequence number to insert, so we only
    1627             :                  * do inserts for the BTRFS_DIR_INDEX_KEY types
    1628             :                  */
    1629           0 :                 if (key->type != BTRFS_DIR_INDEX_KEY)
    1630             :                         goto out;
    1631             :                 goto insert;
    1632             :         }
    1633             : 
    1634           0 :         btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key);
    1635             :         /* the existing item matches the logged item */
    1636           0 :         if (found_key.objectid == log_key.objectid &&
    1637           0 :             found_key.type == log_key.type &&
    1638           0 :             found_key.offset == log_key.offset &&
    1639           0 :             btrfs_dir_type(path->nodes[0], dst_di) == log_type) {
    1640             :                 goto out;
    1641             :         }
    1642             : 
    1643             :         /*
    1644             :          * don't drop the conflicting directory entry if the inode
    1645             :          * for the new entry doesn't exist
    1646             :          */
    1647           0 :         if (!exists)
    1648             :                 goto out;
    1649             : 
    1650           0 :         ret = drop_one_dir_item(trans, root, path, dir, dst_di);
    1651           0 :         if (ret)
    1652             :                 goto out;
    1653             : 
    1654           0 :         if (key->type == BTRFS_DIR_INDEX_KEY)
    1655             :                 goto insert;
    1656             : out:
    1657           0 :         btrfs_release_path(path);
    1658           0 :         if (!ret && update_size) {
    1659           0 :                 btrfs_i_size_write(dir, dir->i_size + name_len * 2);
    1660           0 :                 ret = btrfs_update_inode(trans, root, dir);
    1661             :         }
    1662           0 :         kfree(name);
    1663           0 :         iput(dir);
    1664           0 :         return ret;
    1665             : 
    1666             : insert:
    1667           0 :         btrfs_release_path(path);
    1668           0 :         ret = insert_one_name(trans, root, path, key->objectid, key->offset,
    1669             :                               name, name_len, log_type, &log_key);
    1670           0 :         if (ret && ret != -ENOENT)
    1671             :                 goto out;
    1672             :         update_size = false;
    1673             :         ret = 0;
    1674           0 :         goto out;
    1675             : }
    1676             : 
    1677             : /*
    1678             :  * find all the names in a directory item and reconcile them into
    1679             :  * the subvolume.  Only BTRFS_DIR_ITEM_KEY types will have more than
    1680             :  * one name in a directory item, but the same code gets used for
    1681             :  * both directory index types
    1682             :  */
    1683           0 : static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
    1684             :                                         struct btrfs_root *root,
    1685             :                                         struct btrfs_path *path,
    1686             :                                         struct extent_buffer *eb, int slot,
    1687             :                                         struct btrfs_key *key)
    1688             : {
    1689             :         int ret;
    1690             :         u32 item_size = btrfs_item_size_nr(eb, slot);
    1691             :         struct btrfs_dir_item *di;
    1692             :         int name_len;
    1693             :         unsigned long ptr;
    1694             :         unsigned long ptr_end;
    1695             : 
    1696           0 :         ptr = btrfs_item_ptr_offset(eb, slot);
    1697           0 :         ptr_end = ptr + item_size;
    1698           0 :         while (ptr < ptr_end) {
    1699           0 :                 di = (struct btrfs_dir_item *)ptr;
    1700           0 :                 if (verify_dir_item(root, eb, di))
    1701             :                         return -EIO;
    1702             :                 name_len = btrfs_dir_name_len(eb, di);
    1703           0 :                 ret = replay_one_name(trans, root, path, eb, di, key);
    1704           0 :                 if (ret)
    1705             :                         return ret;
    1706           0 :                 ptr = (unsigned long)(di + 1);
    1707           0 :                 ptr += name_len;
    1708             :         }
    1709             :         return 0;
    1710             : }
    1711             : 
    1712             : /*
    1713             :  * directory replay has two parts.  There are the standard directory
    1714             :  * items in the log copied from the subvolume, and range items
    1715             :  * created in the log while the subvolume was logged.
    1716             :  *
    1717             :  * The range items tell us which parts of the key space the log
    1718             :  * is authoritative for.  During replay, if a key in the subvolume
    1719             :  * directory is in a logged range item, but not actually in the log
    1720             :  * that means it was deleted from the directory before the fsync
    1721             :  * and should be removed.
    1722             :  */
    1723           0 : static noinline int find_dir_range(struct btrfs_root *root,
    1724             :                                    struct btrfs_path *path,
    1725             :                                    u64 dirid, int key_type,
    1726             :                                    u64 *start_ret, u64 *end_ret)
    1727             : {
    1728             :         struct btrfs_key key;
    1729             :         u64 found_end;
    1730             :         struct btrfs_dir_log_item *item;
    1731             :         int ret;
    1732             :         int nritems;
    1733             : 
    1734           0 :         if (*start_ret == (u64)-1)
    1735             :                 return 1;
    1736             : 
    1737           0 :         key.objectid = dirid;
    1738           0 :         key.type = key_type;
    1739           0 :         key.offset = *start_ret;
    1740             : 
    1741           0 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    1742           0 :         if (ret < 0)
    1743             :                 goto out;
    1744           0 :         if (ret > 0) {
    1745           0 :                 if (path->slots[0] == 0)
    1746             :                         goto out;
    1747           0 :                 path->slots[0]--;
    1748             :         }
    1749           0 :         if (ret != 0)
    1750           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    1751             : 
    1752           0 :         if (key.type != key_type || key.objectid != dirid) {
    1753             :                 ret = 1;
    1754             :                 goto next;
    1755             :         }
    1756           0 :         item = btrfs_item_ptr(path->nodes[0], path->slots[0],
    1757             :                               struct btrfs_dir_log_item);
    1758           0 :         found_end = btrfs_dir_log_end(path->nodes[0], item);
    1759             : 
    1760           0 :         if (*start_ret >= key.offset && *start_ret <= found_end) {
    1761             :                 ret = 0;
    1762           0 :                 *start_ret = key.offset;
    1763           0 :                 *end_ret = found_end;
    1764           0 :                 goto out;
    1765             :         }
    1766             :         ret = 1;
    1767             : next:
    1768             :         /* check the next slot in the tree to see if it is a valid item */
    1769           0 :         nritems = btrfs_header_nritems(path->nodes[0]);
    1770           0 :         if (path->slots[0] >= nritems) {
    1771           0 :                 ret = btrfs_next_leaf(root, path);
    1772           0 :                 if (ret)
    1773             :                         goto out;
    1774             :         } else {
    1775           0 :                 path->slots[0]++;
    1776             :         }
    1777             : 
    1778           0 :         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    1779             : 
    1780           0 :         if (key.type != key_type || key.objectid != dirid) {
    1781             :                 ret = 1;
    1782             :                 goto out;
    1783             :         }
    1784           0 :         item = btrfs_item_ptr(path->nodes[0], path->slots[0],
    1785             :                               struct btrfs_dir_log_item);
    1786           0 :         found_end = btrfs_dir_log_end(path->nodes[0], item);
    1787           0 :         *start_ret = key.offset;
    1788           0 :         *end_ret = found_end;
    1789             :         ret = 0;
    1790             : out:
    1791           0 :         btrfs_release_path(path);
    1792           0 :         return ret;
    1793             : }
    1794             : 
    1795             : /*
    1796             :  * this looks for a given directory item in the log.  If the directory
    1797             :  * item is not in the log, the item is removed and the inode it points
    1798             :  * to is unlinked
    1799             :  */
    1800           0 : static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
    1801             :                                       struct btrfs_root *root,
    1802             :                                       struct btrfs_root *log,
    1803             :                                       struct btrfs_path *path,
    1804             :                                       struct btrfs_path *log_path,
    1805             :                                       struct inode *dir,
    1806             :                                       struct btrfs_key *dir_key)
    1807             : {
    1808             :         int ret;
    1809             :         struct extent_buffer *eb;
    1810             :         int slot;
    1811             :         u32 item_size;
    1812             :         struct btrfs_dir_item *di;
    1813             :         struct btrfs_dir_item *log_di;
    1814             :         int name_len;
    1815             :         unsigned long ptr;
    1816             :         unsigned long ptr_end;
    1817             :         char *name;
    1818             :         struct inode *inode;
    1819             :         struct btrfs_key location;
    1820             : 
    1821             : again:
    1822           0 :         eb = path->nodes[0];
    1823           0 :         slot = path->slots[0];
    1824             :         item_size = btrfs_item_size_nr(eb, slot);
    1825           0 :         ptr = btrfs_item_ptr_offset(eb, slot);
    1826           0 :         ptr_end = ptr + item_size;
    1827           0 :         while (ptr < ptr_end) {
    1828           0 :                 di = (struct btrfs_dir_item *)ptr;
    1829           0 :                 if (verify_dir_item(root, eb, di)) {
    1830             :                         ret = -EIO;
    1831             :                         goto out;
    1832             :                 }
    1833             : 
    1834           0 :                 name_len = btrfs_dir_name_len(eb, di);
    1835           0 :                 name = kmalloc(name_len, GFP_NOFS);
    1836           0 :                 if (!name) {
    1837             :                         ret = -ENOMEM;
    1838             :                         goto out;
    1839             :                 }
    1840           0 :                 read_extent_buffer(eb, name, (unsigned long)(di + 1),
    1841             :                                   name_len);
    1842             :                 log_di = NULL;
    1843           0 :                 if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
    1844           0 :                         log_di = btrfs_lookup_dir_item(trans, log, log_path,
    1845             :                                                        dir_key->objectid,
    1846             :                                                        name, name_len, 0);
    1847           0 :                 } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
    1848           0 :                         log_di = btrfs_lookup_dir_index_item(trans, log,
    1849             :                                                      log_path,
    1850             :                                                      dir_key->objectid,
    1851             :                                                      dir_key->offset,
    1852             :                                                      name, name_len, 0);
    1853             :                 }
    1854           0 :                 if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
    1855           0 :                         btrfs_dir_item_key_to_cpu(eb, di, &location);
    1856           0 :                         btrfs_release_path(path);
    1857           0 :                         btrfs_release_path(log_path);
    1858           0 :                         inode = read_one_inode(root, location.objectid);
    1859           0 :                         if (!inode) {
    1860           0 :                                 kfree(name);
    1861           0 :                                 return -EIO;
    1862             :                         }
    1863             : 
    1864           0 :                         ret = link_to_fixup_dir(trans, root,
    1865             :                                                 path, location.objectid);
    1866           0 :                         if (ret) {
    1867           0 :                                 kfree(name);
    1868           0 :                                 iput(inode);
    1869           0 :                                 goto out;
    1870             :                         }
    1871             : 
    1872           0 :                         inc_nlink(inode);
    1873           0 :                         ret = btrfs_unlink_inode(trans, root, dir, inode,
    1874             :                                                  name, name_len);
    1875           0 :                         if (!ret)
    1876           0 :                                 ret = btrfs_run_delayed_items(trans, root);
    1877           0 :                         kfree(name);
    1878           0 :                         iput(inode);
    1879           0 :                         if (ret)
    1880             :                                 goto out;
    1881             : 
    1882             :                         /* there might still be more names under this key
    1883             :                          * check and repeat if required
    1884             :                          */
    1885           0 :                         ret = btrfs_search_slot(NULL, root, dir_key, path,
    1886             :                                                 0, 0);
    1887           0 :                         if (ret == 0)
    1888             :                                 goto again;
    1889             :                         ret = 0;
    1890             :                         goto out;
    1891           0 :                 } else if (IS_ERR(log_di)) {
    1892           0 :                         kfree(name);
    1893           0 :                         return PTR_ERR(log_di);
    1894             :                 }
    1895           0 :                 btrfs_release_path(log_path);
    1896           0 :                 kfree(name);
    1897             : 
    1898             :                 ptr = (unsigned long)(di + 1);
    1899           0 :                 ptr += name_len;
    1900             :         }
    1901             :         ret = 0;
    1902             : out:
    1903           0 :         btrfs_release_path(path);
    1904           0 :         btrfs_release_path(log_path);
    1905           0 :         return ret;
    1906             : }
    1907             : 
    1908             : /*
    1909             :  * deletion replay happens before we copy any new directory items
    1910             :  * out of the log or out of backreferences from inodes.  It
    1911             :  * scans the log to find ranges of keys that log is authoritative for,
    1912             :  * and then scans the directory to find items in those ranges that are
    1913             :  * not present in the log.
    1914             :  *
    1915             :  * Anything we don't find in the log is unlinked and removed from the
    1916             :  * directory.
    1917             :  */
    1918           0 : static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
    1919             :                                        struct btrfs_root *root,
    1920             :                                        struct btrfs_root *log,
    1921             :                                        struct btrfs_path *path,
    1922             :                                        u64 dirid, int del_all)
    1923             : {
    1924             :         u64 range_start;
    1925             :         u64 range_end;
    1926             :         int key_type = BTRFS_DIR_LOG_ITEM_KEY;
    1927             :         int ret = 0;
    1928             :         struct btrfs_key dir_key;
    1929             :         struct btrfs_key found_key;
    1930             :         struct btrfs_path *log_path;
    1931             :         struct inode *dir;
    1932             : 
    1933           0 :         dir_key.objectid = dirid;
    1934           0 :         dir_key.type = BTRFS_DIR_ITEM_KEY;
    1935           0 :         log_path = btrfs_alloc_path();
    1936           0 :         if (!log_path)
    1937             :                 return -ENOMEM;
    1938             : 
    1939           0 :         dir = read_one_inode(root, dirid);
    1940             :         /* it isn't an error if the inode isn't there, that can happen
    1941             :          * because we replay the deletes before we copy in the inode item
    1942             :          * from the log
    1943             :          */
    1944           0 :         if (!dir) {
    1945           0 :                 btrfs_free_path(log_path);
    1946           0 :                 return 0;
    1947             :         }
    1948             : again:
    1949           0 :         range_start = 0;
    1950           0 :         range_end = 0;
    1951             :         while (1) {
    1952           0 :                 if (del_all)
    1953           0 :                         range_end = (u64)-1;
    1954             :                 else {
    1955           0 :                         ret = find_dir_range(log, path, dirid, key_type,
    1956             :                                              &range_start, &range_end);
    1957           0 :                         if (ret != 0)
    1958             :                                 break;
    1959             :                 }
    1960             : 
    1961           0 :                 dir_key.offset = range_start;
    1962             :                 while (1) {
    1963             :                         int nritems;
    1964           0 :                         ret = btrfs_search_slot(NULL, root, &dir_key, path,
    1965             :                                                 0, 0);
    1966           0 :                         if (ret < 0)
    1967             :                                 goto out;
    1968             : 
    1969           0 :                         nritems = btrfs_header_nritems(path->nodes[0]);
    1970           0 :                         if (path->slots[0] >= nritems) {
    1971           0 :                                 ret = btrfs_next_leaf(root, path);
    1972           0 :                                 if (ret)
    1973             :                                         break;
    1974             :                         }
    1975           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
    1976             :                                               path->slots[0]);
    1977           0 :                         if (found_key.objectid != dirid ||
    1978           0 :                             found_key.type != dir_key.type)
    1979             :                                 goto next_type;
    1980             : 
    1981           0 :                         if (found_key.offset > range_end)
    1982             :                                 break;
    1983             : 
    1984           0 :                         ret = check_item_in_log(trans, root, log, path,
    1985             :                                                 log_path, dir,
    1986             :                                                 &found_key);
    1987           0 :                         if (ret)
    1988             :                                 goto out;
    1989           0 :                         if (found_key.offset == (u64)-1)
    1990             :                                 break;
    1991           0 :                         dir_key.offset = found_key.offset + 1;
    1992           0 :                 }
    1993           0 :                 btrfs_release_path(path);
    1994           0 :                 if (range_end == (u64)-1)
    1995             :                         break;
    1996           0 :                 range_start = range_end + 1;
    1997           0 :         }
    1998             : 
    1999             : next_type:
    2000             :         ret = 0;
    2001           0 :         if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
    2002             :                 key_type = BTRFS_DIR_LOG_INDEX_KEY;
    2003           0 :                 dir_key.type = BTRFS_DIR_INDEX_KEY;
    2004           0 :                 btrfs_release_path(path);
    2005           0 :                 goto again;
    2006             :         }
    2007             : out:
    2008           0 :         btrfs_release_path(path);
    2009           0 :         btrfs_free_path(log_path);
    2010           0 :         iput(dir);
    2011           0 :         return ret;
    2012             : }
    2013             : 
    2014             : /*
    2015             :  * the process_func used to replay items from the log tree.  This
    2016             :  * gets called in two different stages.  The first stage just looks
    2017             :  * for inodes and makes sure they are all copied into the subvolume.
    2018             :  *
    2019             :  * The second stage copies all the other item types from the log into
    2020             :  * the subvolume.  The two stage approach is slower, but gets rid of
    2021             :  * lots of complexity around inodes referencing other inodes that exist
    2022             :  * only in the log (references come from either directory items or inode
    2023             :  * back refs).
    2024             :  */
    2025           0 : static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
    2026             :                              struct walk_control *wc, u64 gen)
    2027             : {
    2028             :         int nritems;
    2029             :         struct btrfs_path *path;
    2030           0 :         struct btrfs_root *root = wc->replay_dest;
    2031             :         struct btrfs_key key;
    2032             :         int level;
    2033             :         int i;
    2034             :         int ret;
    2035             : 
    2036           0 :         ret = btrfs_read_buffer(eb, gen);
    2037           0 :         if (ret)
    2038             :                 return ret;
    2039             : 
    2040             :         level = btrfs_header_level(eb);
    2041             : 
    2042           0 :         if (level != 0)
    2043             :                 return 0;
    2044             : 
    2045           0 :         path = btrfs_alloc_path();
    2046           0 :         if (!path)
    2047             :                 return -ENOMEM;
    2048             : 
    2049           0 :         nritems = btrfs_header_nritems(eb);
    2050           0 :         for (i = 0; i < nritems; i++) {
    2051           0 :                 btrfs_item_key_to_cpu(eb, &key, i);
    2052             : 
    2053             :                 /* inode keys are done during the first stage */
    2054           0 :                 if (key.type == BTRFS_INODE_ITEM_KEY &&
    2055           0 :                     wc->stage == LOG_WALK_REPLAY_INODES) {
    2056             :                         struct btrfs_inode_item *inode_item;
    2057             :                         u32 mode;
    2058             : 
    2059           0 :                         inode_item = btrfs_item_ptr(eb, i,
    2060             :                                             struct btrfs_inode_item);
    2061             :                         mode = btrfs_inode_mode(eb, inode_item);
    2062           0 :                         if (S_ISDIR(mode)) {
    2063           0 :                                 ret = replay_dir_deletes(wc->trans,
    2064             :                                          root, log, path, key.objectid, 0);
    2065           0 :                                 if (ret)
    2066             :                                         break;
    2067             :                         }
    2068           0 :                         ret = overwrite_item(wc->trans, root, path,
    2069             :                                              eb, i, &key);
    2070           0 :                         if (ret)
    2071             :                                 break;
    2072             : 
    2073             :                         /* for regular files, make sure corresponding
    2074             :                          * orhpan item exist. extents past the new EOF
    2075             :                          * will be truncated later by orphan cleanup.
    2076             :                          */
    2077           0 :                         if (S_ISREG(mode)) {
    2078           0 :                                 ret = insert_orphan_item(wc->trans, root,
    2079             :                                                          key.objectid);
    2080           0 :                                 if (ret)
    2081             :                                         break;
    2082             :                         }
    2083             : 
    2084           0 :                         ret = link_to_fixup_dir(wc->trans, root,
    2085             :                                                 path, key.objectid);
    2086           0 :                         if (ret)
    2087             :                                 break;
    2088             :                 }
    2089             : 
    2090           0 :                 if (key.type == BTRFS_DIR_INDEX_KEY &&
    2091           0 :                     wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
    2092           0 :                         ret = replay_one_dir_item(wc->trans, root, path,
    2093             :                                                   eb, i, &key);
    2094           0 :                         if (ret)
    2095             :                                 break;
    2096             :                 }
    2097             : 
    2098           0 :                 if (wc->stage < LOG_WALK_REPLAY_ALL)
    2099           0 :                         continue;
    2100             : 
    2101             :                 /* these keys are simply copied */
    2102           0 :                 if (key.type == BTRFS_XATTR_ITEM_KEY) {
    2103           0 :                         ret = overwrite_item(wc->trans, root, path,
    2104             :                                              eb, i, &key);
    2105           0 :                         if (ret)
    2106             :                                 break;
    2107           0 :                 } else if (key.type == BTRFS_INODE_REF_KEY ||
    2108             :                            key.type == BTRFS_INODE_EXTREF_KEY) {
    2109           0 :                         ret = add_inode_ref(wc->trans, root, log, path,
    2110             :                                             eb, i, &key);
    2111           0 :                         if (ret && ret != -ENOENT)
    2112             :                                 break;
    2113             :                         ret = 0;
    2114           0 :                 } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
    2115           0 :                         ret = replay_one_extent(wc->trans, root, path,
    2116             :                                                 eb, i, &key);
    2117           0 :                         if (ret)
    2118             :                                 break;
    2119           0 :                 } else if (key.type == BTRFS_DIR_ITEM_KEY) {
    2120           0 :                         ret = replay_one_dir_item(wc->trans, root, path,
    2121             :                                                   eb, i, &key);
    2122           0 :                         if (ret)
    2123             :                                 break;
    2124             :                 }
    2125             :         }
    2126           0 :         btrfs_free_path(path);
    2127           0 :         return ret;
    2128             : }
    2129             : 
    2130         448 : static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
    2131          57 :                                    struct btrfs_root *root,
    2132             :                                    struct btrfs_path *path, int *level,
    2133             :                                    struct walk_control *wc)
    2134             : {
    2135             :         u64 root_owner;
    2136             :         u64 bytenr;
    2137             :         u64 ptr_gen;
    2138           0 :         struct extent_buffer *next;
    2139         124 :         struct extent_buffer *cur;
    2140          57 :         struct extent_buffer *parent;
    2141             :         u32 blocksize;
    2142             :         int ret = 0;
    2143             : 
    2144         448 :         WARN_ON(*level < 0);
    2145         448 :         WARN_ON(*level >= BTRFS_MAX_LEVEL);
    2146             : 
    2147         505 :         while (*level > 0) {
    2148          62 :                 WARN_ON(*level < 0);
    2149          62 :                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
    2150          62 :                 cur = path->nodes[*level];
    2151             : 
    2152          62 :                 WARN_ON(btrfs_header_level(cur) != *level);
    2153             : 
    2154         124 :                 if (path->slots[*level] >=
    2155             :                     btrfs_header_nritems(cur))
    2156             :                         break;
    2157             : 
    2158             :                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
    2159          57 :                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
    2160          57 :                 blocksize = btrfs_level_size(root, *level - 1);
    2161             : 
    2162          57 :                 parent = path->nodes[*level];
    2163             :                 root_owner = btrfs_header_owner(parent);
    2164             : 
    2165          57 :                 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
    2166          57 :                 if (!next)
    2167             :                         return -ENOMEM;
    2168             : 
    2169          57 :                 if (*level == 1) {
    2170          57 :                         ret = wc->process_func(root, next, wc, ptr_gen);
    2171          57 :                         if (ret) {
    2172           0 :                                 free_extent_buffer(next);
    2173           0 :                                 return ret;
    2174             :                         }
    2175             : 
    2176          57 :                         path->slots[*level]++;
    2177          57 :                         if (wc->free) {
    2178          57 :                                 ret = btrfs_read_buffer(next, ptr_gen);
    2179          57 :                                 if (ret) {
    2180           0 :                                         free_extent_buffer(next);
    2181           0 :                                         return ret;
    2182             :                                 }
    2183             : 
    2184          57 :                                 if (trans) {
    2185          57 :                                         btrfs_tree_lock(next);
    2186             :                                         btrfs_set_lock_blocking(next);
    2187          57 :                                         clean_tree_block(trans, root, next);
    2188          57 :                                         btrfs_wait_tree_block_writeback(next);
    2189          57 :                                         btrfs_tree_unlock(next);
    2190             :                                 }
    2191             : 
    2192          57 :                                 WARN_ON(root_owner !=
    2193             :                                         BTRFS_TREE_LOG_OBJECTID);
    2194          57 :                                 ret = btrfs_free_and_pin_reserved_extent(root,
    2195             :                                                          bytenr, blocksize);
    2196          57 :                                 if (ret) {
    2197           0 :                                         free_extent_buffer(next);
    2198           0 :                                         return ret;
    2199             :                                 }
    2200             :                         }
    2201          57 :                         free_extent_buffer(next);
    2202          57 :                         continue;
    2203             :                 }
    2204           0 :                 ret = btrfs_read_buffer(next, ptr_gen);
    2205           0 :                 if (ret) {
    2206           0 :                         free_extent_buffer(next);
    2207           0 :                         return ret;
    2208             :                 }
    2209             : 
    2210           0 :                 WARN_ON(*level <= 0);
    2211           0 :                 if (path->nodes[*level-1])
    2212           0 :                         free_extent_buffer(path->nodes[*level-1]);
    2213           0 :                 path->nodes[*level-1] = next;
    2214           0 :                 *level = btrfs_header_level(next);
    2215           0 :                 path->slots[*level] = 0;
    2216           0 :                 cond_resched();
    2217             :         }
    2218         448 :         WARN_ON(*level < 0);
    2219         448 :         WARN_ON(*level >= BTRFS_MAX_LEVEL);
    2220             : 
    2221         896 :         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
    2222             : 
    2223         448 :         cond_resched();
    2224         448 :         return 0;
    2225             : }
    2226             : 
    2227         448 : static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
    2228             :                                  struct btrfs_root *root,
    2229             :                                  struct btrfs_path *path, int *level,
    2230             :                                  struct walk_control *wc)
    2231             : {
    2232             :         u64 root_owner;
    2233             :         int i;
    2234             :         int slot;
    2235             :         int ret;
    2236             : 
    2237        1344 :         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
    2238         448 :                 slot = path->slots[i];
    2239         896 :                 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
    2240           0 :                         path->slots[i]++;
    2241           0 :                         *level = i;
    2242           0 :                         WARN_ON(*level == 0);
    2243             :                         return 0;
    2244             :                 } else {
    2245         448 :                         struct extent_buffer *parent;
    2246         896 :                         if (path->nodes[*level] == root->node)
    2247             :                                 parent = path->nodes[*level];
    2248             :                         else
    2249           0 :                                 parent = path->nodes[*level + 1];
    2250             : 
    2251             :                         root_owner = btrfs_header_owner(parent);
    2252         896 :                         ret = wc->process_func(root, path->nodes[*level], wc,
    2253             :                                  btrfs_header_generation(path->nodes[*level]));
    2254         448 :                         if (ret)
    2255             :                                 return ret;
    2256             : 
    2257         448 :                         if (wc->free) {
    2258             :                                 struct extent_buffer *next;
    2259             : 
    2260         448 :                                 next = path->nodes[*level];
    2261             : 
    2262         448 :                                 if (trans) {
    2263         448 :                                         btrfs_tree_lock(next);
    2264             :                                         btrfs_set_lock_blocking(next);
    2265         448 :                                         clean_tree_block(trans, root, next);
    2266         448 :                                         btrfs_wait_tree_block_writeback(next);
    2267         448 :                                         btrfs_tree_unlock(next);
    2268             :                                 }
    2269             : 
    2270         448 :                                 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
    2271         448 :                                 ret = btrfs_free_and_pin_reserved_extent(root,
    2272             :                                                 path->nodes[*level]->start,
    2273         448 :                                                 path->nodes[*level]->len);
    2274         448 :                                 if (ret)
    2275             :                                         return ret;
    2276             :                         }
    2277         448 :                         free_extent_buffer(path->nodes[*level]);
    2278         448 :                         path->nodes[*level] = NULL;
    2279         448 :                         *level = i + 1;
    2280             :                 }
    2281             :         }
    2282             :         return 1;
    2283             : }
    2284             : 
    2285             : /*
    2286             :  * drop the reference count on the tree rooted at 'snap'.  This traverses
    2287             :  * the tree freeing any blocks that have a ref count of zero after being
    2288             :  * decremented.
    2289             :  */
    2290         448 : static int walk_log_tree(struct btrfs_trans_handle *trans,
    2291             :                          struct btrfs_root *log, struct walk_control *wc)
    2292             : {
    2293             :         int ret = 0;
    2294             :         int wret;
    2295             :         int level;
    2296             :         struct btrfs_path *path;
    2297             :         int orig_level;
    2298             : 
    2299         448 :         path = btrfs_alloc_path();
    2300         448 :         if (!path)
    2301             :                 return -ENOMEM;
    2302             : 
    2303         896 :         level = btrfs_header_level(log->node);
    2304             :         orig_level = level;
    2305         448 :         path->nodes[level] = log->node;
    2306         448 :         extent_buffer_get(log->node);
    2307         448 :         path->slots[level] = 0;
    2308             : 
    2309             :         while (1) {
    2310         448 :                 wret = walk_down_log_tree(trans, log, path, &level, wc);
    2311         448 :                 if (wret > 0)
    2312             :                         break;
    2313         448 :                 if (wret < 0) {
    2314             :                         ret = wret;
    2315             :                         goto out;
    2316             :                 }
    2317             : 
    2318         448 :                 wret = walk_up_log_tree(trans, log, path, &level, wc);
    2319         448 :                 if (wret > 0)
    2320             :                         break;
    2321           0 :                 if (wret < 0) {
    2322             :                         ret = wret;
    2323             :                         goto out;
    2324             :                 }
    2325             :         }
    2326             : 
    2327             :         /* was the root node processed? if not, catch it here */
    2328         448 :         if (path->nodes[orig_level]) {
    2329           0 :                 ret = wc->process_func(log, path->nodes[orig_level], wc,
    2330             :                          btrfs_header_generation(path->nodes[orig_level]));
    2331           0 :                 if (ret)
    2332             :                         goto out;
    2333           0 :                 if (wc->free) {
    2334             :                         struct extent_buffer *next;
    2335             : 
    2336           0 :                         next = path->nodes[orig_level];
    2337             : 
    2338           0 :                         if (trans) {
    2339           0 :                                 btrfs_tree_lock(next);
    2340             :                                 btrfs_set_lock_blocking(next);
    2341           0 :                                 clean_tree_block(trans, log, next);
    2342           0 :                                 btrfs_wait_tree_block_writeback(next);
    2343           0 :                                 btrfs_tree_unlock(next);
    2344             :                         }
    2345             : 
    2346           0 :                         WARN_ON(log->root_key.objectid !=
    2347             :                                 BTRFS_TREE_LOG_OBJECTID);
    2348           0 :                         ret = btrfs_free_and_pin_reserved_extent(log, next->start,
    2349           0 :                                                          next->len);
    2350             :                         if (ret)
    2351             :                                 goto out;
    2352             :                 }
    2353             :         }
    2354             : 
    2355             : out:
    2356         448 :         btrfs_free_path(path);
    2357         448 :         return ret;
    2358             : }
    2359             : 
    2360             : /*
    2361             :  * helper function to update the item for a given subvolumes log root
    2362             :  * in the tree of log roots
    2363             :  */
    2364        1477 : static int update_log_root(struct btrfs_trans_handle *trans,
    2365             :                            struct btrfs_root *log)
    2366             : {
    2367             :         int ret;
    2368             : 
    2369        1477 :         if (log->log_transid == 1) {
    2370             :                 /* insert root item on the first sync */
    2371         223 :                 ret = btrfs_insert_root(trans, log->fs_info->log_root_tree,
    2372             :                                 &log->root_key, &log->root_item);
    2373             :         } else {
    2374        1254 :                 ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
    2375             :                                 &log->root_key, &log->root_item);
    2376             :         }
    2377        1477 :         return ret;
    2378             : }
    2379             : 
    2380          18 : static void wait_log_commit(struct btrfs_trans_handle *trans,
    2381             :                             struct btrfs_root *root, int transid)
    2382             : {
    2383          36 :         DEFINE_WAIT(wait);
    2384          18 :         int index = transid % 2;
    2385             : 
    2386             :         /*
    2387             :          * we only allow two pending log transactions at a time,
    2388             :          * so we know that if ours is more than 2 older than the
    2389             :          * current transaction, we're done
    2390             :          */
    2391             :         do {
    2392          18 :                 prepare_to_wait(&root->log_commit_wait[index],
    2393             :                                 &wait, TASK_UNINTERRUPTIBLE);
    2394          18 :                 mutex_unlock(&root->log_mutex);
    2395             : 
    2396          36 :                 if (root->log_transid_committed < transid &&
    2397          18 :                     atomic_read(&root->log_commit[index]))
    2398          18 :                         schedule();
    2399             : 
    2400          18 :                 finish_wait(&root->log_commit_wait[index], &wait);
    2401          18 :                 mutex_lock(&root->log_mutex);
    2402          18 :         } while (root->log_transid_committed < transid &&
    2403          18 :                  atomic_read(&root->log_commit[index]));
    2404          18 : }
    2405             : 
    2406        2957 : static void wait_for_writer(struct btrfs_trans_handle *trans,
    2407             :                             struct btrfs_root *root)
    2408             : {
    2409        5914 :         DEFINE_WAIT(wait);
    2410             : 
    2411        2958 :         while (atomic_read(&root->log_writers)) {
    2412           1 :                 prepare_to_wait(&root->log_writer_wait,
    2413             :                                 &wait, TASK_UNINTERRUPTIBLE);
    2414           1 :                 mutex_unlock(&root->log_mutex);
    2415           1 :                 if (atomic_read(&root->log_writers))
    2416           1 :                         schedule();
    2417           1 :                 mutex_lock(&root->log_mutex);
    2418           1 :                 finish_wait(&root->log_writer_wait, &wait);
    2419             :         }
    2420        2957 : }
    2421             : 
    2422           0 : static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
    2423             :                                         struct btrfs_log_ctx *ctx)
    2424             : {
    2425           0 :         if (!ctx)
    2426           0 :                 return;
    2427             : 
    2428           0 :         mutex_lock(&root->log_mutex);
    2429           0 :         list_del_init(&ctx->list);
    2430           0 :         mutex_unlock(&root->log_mutex);
    2431             : }
    2432             : 
    2433             : /* 
    2434             :  * Invoked in log mutex context, or be sure there is no other task which
    2435             :  * can access the list.
    2436             :  */
    2437             : static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
    2438             :                                              int index, int error)
    2439             : {
    2440             :         struct btrfs_log_ctx *ctx;
    2441             : 
    2442        2956 :         if (!error) {
    2443        2954 :                 INIT_LIST_HEAD(&root->log_ctxs[index]);
    2444             :                 return;
    2445             :         }
    2446             : 
    2447           4 :         list_for_each_entry(ctx, &root->log_ctxs[index], list)
    2448           2 :                 ctx->log_ret = error;
    2449             : 
    2450             :         INIT_LIST_HEAD(&root->log_ctxs[index]);
    2451             : }
    2452             : 
    2453             : /*
    2454             :  * btrfs_sync_log does sends a given tree log down to the disk and
    2455             :  * updates the super blocks to record it.  When this call is done,
    2456             :  * you know that any inodes previously logged are safely on disk only
    2457             :  * if it returns 0.
    2458             :  *
    2459             :  * Any other return value means you need to call btrfs_commit_transaction.
    2460             :  * Some of the edge cases for fsyncing directories that have had unlinks
    2461             :  * or renames done in the past mean that sometimes the only safe
    2462             :  * fsync is to commit the whole FS.  When btrfs_sync_log returns -EAGAIN,
    2463             :  * that has happened.
    2464             :  */
    2465        4439 : int btrfs_sync_log(struct btrfs_trans_handle *trans,
    2466             :                    struct btrfs_root *root, struct btrfs_log_ctx *ctx)
    2467             : {
    2468             :         int index1;
    2469             :         int index2;
    2470             :         int mark;
    2471             :         int ret;
    2472        1483 :         struct btrfs_root *log = root->log_root;
    2473        1483 :         struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
    2474             :         int log_transid = 0;
    2475             :         struct btrfs_log_ctx root_log_ctx;
    2476             :         struct blk_plug plug;
    2477             : 
    2478        1483 :         mutex_lock(&root->log_mutex);
    2479        1483 :         log_transid = ctx->log_transid;
    2480        1483 :         if (root->log_transid_committed >= log_transid) {
    2481           0 :                 mutex_unlock(&root->log_mutex);
    2482           0 :                 return ctx->log_ret;
    2483             :         }
    2484             : 
    2485        1483 :         index1 = log_transid % 2;
    2486        2966 :         if (atomic_read(&root->log_commit[index1])) {
    2487           4 :                 wait_log_commit(trans, root, log_transid);
    2488           4 :                 mutex_unlock(&root->log_mutex);
    2489           4 :                 return ctx->log_ret;
    2490             :         }
    2491             :         ASSERT(log_transid == root->log_transid);
    2492             :         atomic_set(&root->log_commit[index1], 1);
    2493             : 
    2494             :         /* wait for previous tree log sync to complete */
    2495        2958 :         if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
    2496          14 :                 wait_log_commit(trans, root, log_transid - 1);
    2497             : 
    2498             :         while (1) {
    2499             :                 int batch = atomic_read(&root->log_batch);
    2500             :                 /* when we're on an ssd, just kick the log commit out */
    2501        2960 :                 if (!btrfs_test_opt(root, SSD) &&
    2502             :                     test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) {
    2503           6 :                         mutex_unlock(&root->log_mutex);
    2504           6 :                         schedule_timeout_uninterruptible(1);
    2505           6 :                         mutex_lock(&root->log_mutex);
    2506             :                 }
    2507        1480 :                 wait_for_writer(trans, root);
    2508        1480 :                 if (batch == atomic_read(&root->log_batch))
    2509             :                         break;
    2510             :         }
    2511             : 
    2512             :         /* bail out if we need to do a full commit */
    2513        2958 :         if (btrfs_need_log_full_commit(root->fs_info, trans)) {
    2514             :                 ret = -EAGAIN;
    2515           2 :                 btrfs_free_logged_extents(log, log_transid);
    2516           2 :                 mutex_unlock(&root->log_mutex);
    2517           2 :                 goto out;
    2518             :         }
    2519             : 
    2520        1477 :         if (log_transid % 2 == 0)
    2521             :                 mark = EXTENT_DIRTY;
    2522             :         else
    2523             :                 mark = EXTENT_NEW;
    2524             : 
    2525             :         /* we start IO on  all the marked extents here, but we don't actually
    2526             :          * wait for them until later.
    2527             :          */
    2528        1477 :         blk_start_plug(&plug);
    2529        1477 :         ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
    2530        1477 :         if (ret) {
    2531           0 :                 blk_finish_plug(&plug);
    2532           0 :                 btrfs_abort_transaction(trans, root, ret);
    2533           0 :                 btrfs_free_logged_extents(log, log_transid);
    2534           0 :                 btrfs_set_log_full_commit(root->fs_info, trans);
    2535           0 :                 mutex_unlock(&root->log_mutex);
    2536           0 :                 goto out;
    2537             :         }
    2538             : 
    2539        1477 :         btrfs_set_root_node(&log->root_item, log->node);
    2540             : 
    2541        1477 :         root->log_transid++;
    2542        1477 :         log->log_transid = root->log_transid;
    2543        1477 :         root->log_start_pid = 0;
    2544             :         /*
    2545             :          * IO has been started, blocks of the log tree have WRITTEN flag set
    2546             :          * in their headers. new modifications of the log will be written to
    2547             :          * new positions. so it's safe to allow log writers to go in.
    2548             :          */
    2549        1477 :         mutex_unlock(&root->log_mutex);
    2550             : 
    2551             :         btrfs_init_log_ctx(&root_log_ctx);
    2552             : 
    2553        1477 :         mutex_lock(&log_root_tree->log_mutex);
    2554        1477 :         atomic_inc(&log_root_tree->log_batch);
    2555        1477 :         atomic_inc(&log_root_tree->log_writers);
    2556             : 
    2557        1477 :         index2 = log_root_tree->log_transid % 2;
    2558        1477 :         list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
    2559        1477 :         root_log_ctx.log_transid = log_root_tree->log_transid;
    2560             : 
    2561        1477 :         mutex_unlock(&log_root_tree->log_mutex);
    2562             : 
    2563        1477 :         ret = update_log_root(trans, log);
    2564             : 
    2565        1477 :         mutex_lock(&log_root_tree->log_mutex);
    2566        1477 :         if (atomic_dec_and_test(&log_root_tree->log_writers)) {
    2567        1477 :                 smp_mb();
    2568        1477 :                 if (waitqueue_active(&log_root_tree->log_writer_wait))
    2569           0 :                         wake_up(&log_root_tree->log_writer_wait);
    2570             :         }
    2571             : 
    2572        1477 :         if (ret) {
    2573           0 :                 if (!list_empty(&root_log_ctx.list))
    2574             :                         list_del_init(&root_log_ctx.list);
    2575             : 
    2576           0 :                 blk_finish_plug(&plug);
    2577           0 :                 btrfs_set_log_full_commit(root->fs_info, trans);
    2578             : 
    2579           0 :                 if (ret != -ENOSPC) {
    2580           0 :                         btrfs_abort_transaction(trans, root, ret);
    2581           0 :                         mutex_unlock(&log_root_tree->log_mutex);
    2582           0 :                         goto out;
    2583             :                 }
    2584           0 :                 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
    2585           0 :                 btrfs_free_logged_extents(log, log_transid);
    2586           0 :                 mutex_unlock(&log_root_tree->log_mutex);
    2587             :                 ret = -EAGAIN;
    2588           0 :                 goto out;
    2589             :         }
    2590             : 
    2591        1477 :         if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
    2592           0 :                 mutex_unlock(&log_root_tree->log_mutex);
    2593           0 :                 ret = root_log_ctx.log_ret;
    2594           0 :                 goto out;
    2595             :         }
    2596             : 
    2597        1477 :         index2 = root_log_ctx.log_transid % 2;
    2598        2954 :         if (atomic_read(&log_root_tree->log_commit[index2])) {
    2599           0 :                 blk_finish_plug(&plug);
    2600           0 :                 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
    2601           0 :                 wait_log_commit(trans, log_root_tree,
    2602             :                                 root_log_ctx.log_transid);
    2603           0 :                 btrfs_free_logged_extents(log, log_transid);
    2604           0 :                 mutex_unlock(&log_root_tree->log_mutex);
    2605           0 :                 ret = root_log_ctx.log_ret;
    2606           0 :                 goto out;
    2607             :         }
    2608             :         ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid);
    2609             :         atomic_set(&log_root_tree->log_commit[index2], 1);
    2610             : 
    2611        2954 :         if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
    2612           0 :                 wait_log_commit(trans, log_root_tree,
    2613           0 :                                 root_log_ctx.log_transid - 1);
    2614             :         }
    2615             : 
    2616        1477 :         wait_for_writer(trans, log_root_tree);
    2617             : 
    2618             :         /*
    2619             :          * now that we've moved on to the tree of log tree roots,
    2620             :          * check the full commit flag again
    2621             :          */
    2622        2954 :         if (btrfs_need_log_full_commit(root->fs_info, trans)) {
    2623           0 :                 blk_finish_plug(&plug);
    2624           0 :                 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
    2625           0 :                 btrfs_free_logged_extents(log, log_transid);
    2626           0 :                 mutex_unlock(&log_root_tree->log_mutex);
    2627             :                 ret = -EAGAIN;
    2628           0 :                 goto out_wake_log_root;
    2629             :         }
    2630             : 
    2631        1477 :         ret = btrfs_write_marked_extents(log_root_tree,
    2632             :                                          &log_root_tree->dirty_log_pages,
    2633             :                                          EXTENT_DIRTY | EXTENT_NEW);
    2634        1477 :         blk_finish_plug(&plug);
    2635        1477 :         if (ret) {
    2636           0 :                 btrfs_set_log_full_commit(root->fs_info, trans);
    2637           0 :                 btrfs_abort_transaction(trans, root, ret);
    2638           0 :                 btrfs_free_logged_extents(log, log_transid);
    2639           0 :                 mutex_unlock(&log_root_tree->log_mutex);
    2640           0 :                 goto out_wake_log_root;
    2641             :         }
    2642        1477 :         btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
    2643        1477 :         btrfs_wait_marked_extents(log_root_tree,
    2644             :                                   &log_root_tree->dirty_log_pages,
    2645             :                                   EXTENT_NEW | EXTENT_DIRTY);
    2646        1477 :         btrfs_wait_logged_extents(log, log_transid);
    2647             : 
    2648        1477 :         btrfs_set_super_log_root(root->fs_info->super_for_commit,
    2649        2954 :                                 log_root_tree->node->start);
    2650        1477 :         btrfs_set_super_log_root_level(root->fs_info->super_for_commit,
    2651        1477 :                                 btrfs_header_level(log_root_tree->node));
    2652             : 
    2653        1477 :         log_root_tree->log_transid++;
    2654        1477 :         mutex_unlock(&log_root_tree->log_mutex);
    2655             : 
    2656             :         /*
    2657             :          * nobody else is going to jump in and write the the ctree
    2658             :          * super here because the log_commit atomic below is protecting
    2659             :          * us.  We must be called with a transaction handle pinning
    2660             :          * the running transaction open, so a full commit can't hop
    2661             :          * in and cause problems either.
    2662             :          */
    2663        1477 :         ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
    2664        1477 :         if (ret) {
    2665           0 :                 btrfs_set_log_full_commit(root->fs_info, trans);
    2666           0 :                 btrfs_abort_transaction(trans, root, ret);
    2667           0 :                 goto out_wake_log_root;
    2668             :         }
    2669             : 
    2670        1477 :         mutex_lock(&root->log_mutex);
    2671        1477 :         if (root->last_log_commit < log_transid)
    2672        1254 :                 root->last_log_commit = log_transid;
    2673        1477 :         mutex_unlock(&root->log_mutex);
    2674             : 
    2675             : out_wake_log_root:
    2676             :         /*
    2677             :          * We needn't get log_mutex here because we are sure all
    2678             :          * the other tasks are blocked.
    2679             :          */
    2680             :         btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
    2681             : 
    2682        1477 :         mutex_lock(&log_root_tree->log_mutex);
    2683        1477 :         log_root_tree->log_transid_committed++;
    2684             :         atomic_set(&log_root_tree->log_commit[index2], 0);
    2685        1477 :         mutex_unlock(&log_root_tree->log_mutex);
    2686             : 
    2687        2954 :         if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
    2688           0 :                 wake_up(&log_root_tree->log_commit_wait[index2]);
    2689             : out:
    2690             :         /* See above. */
    2691             :         btrfs_remove_all_log_ctxs(root, index1, ret);
    2692             : 
    2693        1479 :         mutex_lock(&root->log_mutex);
    2694        1479 :         root->log_transid_committed++;
    2695             :         atomic_set(&root->log_commit[index1], 0);
    2696        1479 :         mutex_unlock(&root->log_mutex);
    2697             : 
    2698        2958 :         if (waitqueue_active(&root->log_commit_wait[index1]))
    2699          18 :                 wake_up(&root->log_commit_wait[index1]);
    2700        1479 :         return ret;
    2701             : }
    2702             : 
    2703         448 : static void free_log_tree(struct btrfs_trans_handle *trans,
    2704             :                           struct btrfs_root *log)
    2705             : {
    2706             :         int ret;
    2707             :         u64 start;
    2708             :         u64 end;
    2709         448 :         struct walk_control wc = {
    2710             :                 .free = 1,
    2711             :                 .process_func = process_one_buffer
    2712             :         };
    2713             : 
    2714         448 :         ret = walk_log_tree(trans, log, &wc);
    2715             :         /* I don't think this can happen but just in case */
    2716         448 :         if (ret)
    2717           0 :                 btrfs_abort_transaction(trans, log, ret);
    2718             : 
    2719             :         while (1) {
    2720         499 :                 ret = find_first_extent_bit(&log->dirty_log_pages,
    2721             :                                 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
    2722             :                                 NULL);
    2723         499 :                 if (ret)
    2724             :                         break;
    2725             : 
    2726          51 :                 clear_extent_bits(&log->dirty_log_pages, start, end,
    2727             :                                   EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
    2728          51 :         }
    2729             : 
    2730             :         /*
    2731             :          * We may have short-circuited the log tree with the full commit logic
    2732             :          * and left ordered extents on our list, so clear these out to keep us
    2733             :          * from leaking inodes and memory.
    2734             :          */
    2735         448 :         btrfs_free_logged_extents(log, 0);
    2736         448 :         btrfs_free_logged_extents(log, 1);
    2737             : 
    2738         448 :         free_extent_buffer(log->node);
    2739         448 :         kfree(log);
    2740         448 : }
    2741             : 
    2742             : /*
    2743             :  * free all the extents used by the tree log.  This should be called
    2744             :  * at commit time of the full transaction
    2745             :  */
    2746        2548 : int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
    2747             : {
    2748        2548 :         if (root->log_root) {
    2749         224 :                 free_log_tree(trans, root->log_root);
    2750         224 :                 root->log_root = NULL;
    2751             :         }
    2752        2548 :         return 0;
    2753             : }
    2754             : 
    2755        2098 : int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
    2756             :                              struct btrfs_fs_info *fs_info)
    2757             : {
    2758        2098 :         if (fs_info->log_root_tree) {
    2759         224 :                 free_log_tree(trans, fs_info->log_root_tree);
    2760         224 :                 fs_info->log_root_tree = NULL;
    2761             :         }
    2762        2098 :         return 0;
    2763             : }
    2764             : 
    2765             : /*
    2766             :  * If both a file and directory are logged, and unlinks or renames are
    2767             :  * mixed in, we have a few interesting corners:
    2768             :  *
    2769             :  * create file X in dir Y
    2770             :  * link file X to X.link in dir Y
    2771             :  * fsync file X
    2772             :  * unlink file X but leave X.link
    2773             :  * fsync dir Y
    2774             :  *
    2775             :  * After a crash we would expect only X.link to exist.  But file X
    2776             :  * didn't get fsync'd again so the log has back refs for X and X.link.
    2777             :  *
    2778             :  * We solve this by removing directory entries and inode backrefs from the
    2779             :  * log when a file that was logged in the current transaction is
    2780             :  * unlinked.  Any later fsync will include the updated log entries, and
    2781             :  * we'll be able to reconstruct the proper directory items from backrefs.
    2782             :  *
    2783             :  * This optimizations allows us to avoid relogging the entire inode
    2784             :  * or the entire directory.
    2785             :  */
    2786       12289 : int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
    2787             :                                  struct btrfs_root *root,
    2788             :                                  const char *name, int name_len,
    2789             :                                  struct inode *dir, u64 index)
    2790             : {
    2791             :         struct btrfs_root *log;
    2792             :         struct btrfs_dir_item *di;
    2793             :         struct btrfs_path *path;
    2794             :         int ret;
    2795             :         int err = 0;
    2796             :         int bytes_del = 0;
    2797             :         u64 dir_ino = btrfs_ino(dir);
    2798             : 
    2799       12289 :         if (BTRFS_I(dir)->logged_trans < trans->transid)
    2800             :                 return 0;
    2801             : 
    2802         151 :         ret = join_running_log_trans(root);
    2803         151 :         if (ret)
    2804             :                 return 0;
    2805             : 
    2806         151 :         mutex_lock(&BTRFS_I(dir)->log_mutex);
    2807             : 
    2808         151 :         log = root->log_root;
    2809         151 :         path = btrfs_alloc_path();
    2810         151 :         if (!path) {
    2811             :                 err = -ENOMEM;
    2812             :                 goto out_unlock;
    2813             :         }
    2814             : 
    2815         151 :         di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
    2816             :                                    name, name_len, -1);
    2817         151 :         if (IS_ERR(di)) {
    2818           0 :                 err = PTR_ERR(di);
    2819           0 :                 goto fail;
    2820             :         }
    2821         151 :         if (di) {
    2822           0 :                 ret = btrfs_delete_one_dir_name(trans, log, path, di);
    2823             :                 bytes_del += name_len;
    2824           0 :                 if (ret) {
    2825             :                         err = ret;
    2826             :                         goto fail;
    2827             :                 }
    2828             :         }
    2829         151 :         btrfs_release_path(path);
    2830         151 :         di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
    2831             :                                          index, name, name_len, -1);
    2832         151 :         if (IS_ERR(di)) {
    2833         151 :                 err = PTR_ERR(di);
    2834         151 :                 goto fail;
    2835             :         }
    2836           0 :         if (di) {
    2837           0 :                 ret = btrfs_delete_one_dir_name(trans, log, path, di);
    2838           0 :                 bytes_del += name_len;
    2839           0 :                 if (ret) {
    2840             :                         err = ret;
    2841             :                         goto fail;
    2842             :                 }
    2843             :         }
    2844             : 
    2845             :         /* update the directory size in the log to reflect the names
    2846             :          * we have removed
    2847             :          */
    2848           0 :         if (bytes_del) {
    2849             :                 struct btrfs_key key;
    2850             : 
    2851           0 :                 key.objectid = dir_ino;
    2852           0 :                 key.offset = 0;
    2853           0 :                 key.type = BTRFS_INODE_ITEM_KEY;
    2854           0 :                 btrfs_release_path(path);
    2855             : 
    2856           0 :                 ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
    2857           0 :                 if (ret < 0) {
    2858             :                         err = ret;
    2859           0 :                         goto fail;
    2860             :                 }
    2861           0 :                 if (ret == 0) {
    2862             :                         struct btrfs_inode_item *item;
    2863             :                         u64 i_size;
    2864             : 
    2865           0 :                         item = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2866             :                                               struct btrfs_inode_item);
    2867           0 :                         i_size = btrfs_inode_size(path->nodes[0], item);
    2868           0 :                         if (i_size > bytes_del)
    2869           0 :                                 i_size -= bytes_del;
    2870             :                         else
    2871             :                                 i_size = 0;
    2872           0 :                         btrfs_set_inode_size(path->nodes[0], item, i_size);
    2873           0 :                         btrfs_mark_buffer_dirty(path->nodes[0]);
    2874             :                 } else
    2875             :                         ret = 0;
    2876           0 :                 btrfs_release_path(path);
    2877             :         }
    2878             : fail:
    2879         151 :         btrfs_free_path(path);
    2880             : out_unlock:
    2881         151 :         mutex_unlock(&BTRFS_I(dir)->log_mutex);
    2882         151 :         if (ret == -ENOSPC) {
    2883           0 :                 btrfs_set_log_full_commit(root->fs_info, trans);
    2884             :                 ret = 0;
    2885         151 :         } else if (ret < 0)
    2886           0 :                 btrfs_abort_transaction(trans, root, ret);
    2887             : 
    2888         151 :         btrfs_end_log_trans(root);
    2889             : 
    2890         151 :         return err;
    2891             : }
    2892             : 
    2893             : /* see comments for btrfs_del_dir_entries_in_log */
    2894       12289 : int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
    2895             :                                struct btrfs_root *root,
    2896             :                                const char *name, int name_len,
    2897             :                                struct inode *inode, u64 dirid)
    2898             : {
    2899             :         struct btrfs_root *log;
    2900             :         u64 index;
    2901             :         int ret;
    2902             : 
    2903       12289 :         if (BTRFS_I(inode)->logged_trans < trans->transid)
    2904             :                 return 0;
    2905             : 
    2906          49 :         ret = join_running_log_trans(root);
    2907          49 :         if (ret)
    2908             :                 return 0;
    2909          49 :         log = root->log_root;
    2910          49 :         mutex_lock(&BTRFS_I(inode)->log_mutex);
    2911             : 
    2912          49 :         ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
    2913             :                                   dirid, &index);
    2914          49 :         mutex_unlock(&BTRFS_I(inode)->log_mutex);
    2915          49 :         if (ret == -ENOSPC) {
    2916           0 :                 btrfs_set_log_full_commit(root->fs_info, trans);
    2917             :                 ret = 0;
    2918          49 :         } else if (ret < 0 && ret != -ENOENT)
    2919           0 :                 btrfs_abort_transaction(trans, root, ret);
    2920          49 :         btrfs_end_log_trans(root);
    2921             : 
    2922          49 :         return ret;
    2923             : }
    2924             : 
    2925             : /*
    2926             :  * creates a range item in the log for 'dirid'.  first_offset and
    2927             :  * last_offset tell us which parts of the key space the log should
    2928             :  * be considered authoritative for.
    2929             :  */
    2930           0 : static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
    2931             :                                        struct btrfs_root *log,
    2932             :                                        struct btrfs_path *path,
    2933             :                                        int key_type, u64 dirid,
    2934             :                                        u64 first_offset, u64 last_offset)
    2935             : {
    2936             :         int ret;
    2937             :         struct btrfs_key key;
    2938             :         struct btrfs_dir_log_item *item;
    2939             : 
    2940           0 :         key.objectid = dirid;
    2941           0 :         key.offset = first_offset;
    2942           0 :         if (key_type == BTRFS_DIR_ITEM_KEY)
    2943           0 :                 key.type = BTRFS_DIR_LOG_ITEM_KEY;
    2944             :         else
    2945           0 :                 key.type = BTRFS_DIR_LOG_INDEX_KEY;
    2946             :         ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
    2947           0 :         if (ret)
    2948             :                 return ret;
    2949             : 
    2950           0 :         item = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2951             :                               struct btrfs_dir_log_item);
    2952           0 :         btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
    2953           0 :         btrfs_mark_buffer_dirty(path->nodes[0]);
    2954           0 :         btrfs_release_path(path);
    2955           0 :         return 0;
    2956             : }
    2957             : 
    2958             : /*
    2959             :  * log all the items included in the current transaction for a given
    2960             :  * directory.  This also creates the range items in the log tree required
    2961             :  * to replay anything deleted before the fsync
    2962             :  */
    2963           0 : static noinline int log_dir_items(struct btrfs_trans_handle *trans,
    2964             :                           struct btrfs_root *root, struct inode *inode,
    2965             :                           struct btrfs_path *path,
    2966             :                           struct btrfs_path *dst_path, int key_type,
    2967             :                           u64 min_offset, u64 *last_offset_ret)
    2968             : {
    2969             :         struct btrfs_key min_key;
    2970             :         struct btrfs_root *log = root->log_root;
    2971           0 :         struct extent_buffer *src;
    2972             :         int err = 0;
    2973             :         int ret;
    2974             :         int i;
    2975             :         int nritems;
    2976             :         u64 first_offset = min_offset;
    2977             :         u64 last_offset = (u64)-1;
    2978             :         u64 ino = btrfs_ino(inode);
    2979             : 
    2980           0 :         log = root->log_root;
    2981             : 
    2982           0 :         min_key.objectid = ino;
    2983           0 :         min_key.type = key_type;
    2984           0 :         min_key.offset = min_offset;
    2985             : 
    2986           0 :         path->keep_locks = 1;
    2987             : 
    2988           0 :         ret = btrfs_search_forward(root, &min_key, path, trans->transid);
    2989             : 
    2990             :         /*
    2991             :          * we didn't find anything from this transaction, see if there
    2992             :          * is anything at all
    2993             :          */
    2994           0 :         if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
    2995           0 :                 min_key.objectid = ino;
    2996           0 :                 min_key.type = key_type;
    2997           0 :                 min_key.offset = (u64)-1;
    2998           0 :                 btrfs_release_path(path);
    2999           0 :                 ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
    3000           0 :                 if (ret < 0) {
    3001           0 :                         btrfs_release_path(path);
    3002           0 :                         return ret;
    3003             :                 }
    3004           0 :                 ret = btrfs_previous_item(root, path, ino, key_type);
    3005             : 
    3006             :                 /* if ret == 0 there are items for this type,
    3007             :                  * create a range to tell us the last key of this type.
    3008             :                  * otherwise, there are no items in this directory after
    3009             :                  * *min_offset, and we create a range to indicate that.
    3010             :                  */
    3011           0 :                 if (ret == 0) {
    3012             :                         struct btrfs_key tmp;
    3013           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &tmp,
    3014             :                                               path->slots[0]);
    3015           0 :                         if (key_type == tmp.type)
    3016           0 :                                 first_offset = max(min_offset, tmp.offset) + 1;
    3017             :                 }
    3018             :                 goto done;
    3019             :         }
    3020             : 
    3021             :         /* go backward to find any previous key */
    3022           0 :         ret = btrfs_previous_item(root, path, ino, key_type);
    3023           0 :         if (ret == 0) {
    3024             :                 struct btrfs_key tmp;
    3025           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
    3026           0 :                 if (key_type == tmp.type) {
    3027           0 :                         first_offset = tmp.offset;
    3028           0 :                         ret = overwrite_item(trans, log, dst_path,
    3029             :                                              path->nodes[0], path->slots[0],
    3030             :                                              &tmp);
    3031           0 :                         if (ret) {
    3032             :                                 err = ret;
    3033           0 :                                 goto done;
    3034             :                         }
    3035             :                 }
    3036             :         }
    3037           0 :         btrfs_release_path(path);
    3038             : 
    3039             :         /* find the first key from this transaction again */
    3040           0 :         ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
    3041           0 :         if (WARN_ON(ret != 0))
    3042             :                 goto done;
    3043             : 
    3044             :         /*
    3045             :          * we have a block from this transaction, log every item in it
    3046             :          * from our directory
    3047             :          */
    3048             :         while (1) {
    3049             :                 struct btrfs_key tmp;
    3050           0 :                 src = path->nodes[0];
    3051           0 :                 nritems = btrfs_header_nritems(src);
    3052           0 :                 for (i = path->slots[0]; i < nritems; i++) {
    3053           0 :                         btrfs_item_key_to_cpu(src, &min_key, i);
    3054             : 
    3055           0 :                         if (min_key.objectid != ino || min_key.type != key_type)
    3056             :                                 goto done;
    3057           0 :                         ret = overwrite_item(trans, log, dst_path, src, i,
    3058             :                                              &min_key);
    3059           0 :                         if (ret) {
    3060             :                                 err = ret;
    3061             :                                 goto done;
    3062             :                         }
    3063             :                 }
    3064           0 :                 path->slots[0] = nritems;
    3065             : 
    3066             :                 /*
    3067             :                  * look ahead to the next item and see if it is also
    3068             :                  * from this directory and from this transaction
    3069             :                  */
    3070           0 :                 ret = btrfs_next_leaf(root, path);
    3071           0 :                 if (ret == 1) {
    3072             :                         last_offset = (u64)-1;
    3073             :                         goto done;
    3074             :                 }
    3075           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
    3076           0 :                 if (tmp.objectid != ino || tmp.type != key_type) {
    3077             :                         last_offset = (u64)-1;
    3078             :                         goto done;
    3079             :                 }
    3080           0 :                 if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
    3081           0 :                         ret = overwrite_item(trans, log, dst_path,
    3082             :                                              path->nodes[0], path->slots[0],
    3083             :                                              &tmp);
    3084           0 :                         if (ret)
    3085             :                                 err = ret;
    3086             :                         else
    3087           0 :                                 last_offset = tmp.offset;
    3088             :                         goto done;
    3089             :                 }
    3090           0 :         }
    3091             : done:
    3092           0 :         btrfs_release_path(path);
    3093           0 :         btrfs_release_path(dst_path);
    3094             : 
    3095           0 :         if (err == 0) {
    3096           0 :                 *last_offset_ret = last_offset;
    3097             :                 /*
    3098             :                  * insert the log range keys to indicate where the log
    3099             :                  * is valid
    3100             :                  */
    3101           0 :                 ret = insert_dir_log_key(trans, log, path, key_type,
    3102             :                                          ino, first_offset, last_offset);
    3103           0 :                 if (ret)
    3104             :                         err = ret;
    3105             :         }
    3106           0 :         return err;
    3107             : }
    3108             : 
    3109             : /*
    3110             :  * logging directories is very similar to logging inodes, We find all the items
    3111             :  * from the current transaction and write them to the log.
    3112             :  *
    3113             :  * The recovery code scans the directory in the subvolume, and if it finds a
    3114             :  * key in the range logged that is not present in the log tree, then it means
    3115             :  * that dir entry was unlinked during the transaction.
    3116             :  *
    3117             :  * In order for that scan to work, we must include one key smaller than
    3118             :  * the smallest logged by this transaction and one key larger than the largest
    3119             :  * key logged by this transaction.
    3120             :  */
    3121           0 : static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
    3122             :                           struct btrfs_root *root, struct inode *inode,
    3123             :                           struct btrfs_path *path,
    3124             :                           struct btrfs_path *dst_path)
    3125             : {
    3126             :         u64 min_key;
    3127             :         u64 max_key;
    3128             :         int ret;
    3129             :         int key_type = BTRFS_DIR_ITEM_KEY;
    3130             : 
    3131             : again:
    3132             :         min_key = 0;
    3133           0 :         max_key = 0;
    3134             :         while (1) {
    3135           0 :                 ret = log_dir_items(trans, root, inode, path,
    3136             :                                     dst_path, key_type, min_key,
    3137             :                                     &max_key);
    3138           0 :                 if (ret)
    3139             :                         return ret;
    3140           0 :                 if (max_key == (u64)-1)
    3141             :                         break;
    3142           0 :                 min_key = max_key + 1;
    3143           0 :         }
    3144             : 
    3145           0 :         if (key_type == BTRFS_DIR_ITEM_KEY) {
    3146             :                 key_type = BTRFS_DIR_INDEX_KEY;
    3147             :                 goto again;
    3148             :         }
    3149             :         return 0;
    3150             : }
    3151             : 
    3152             : /*
    3153             :  * a helper function to drop items from the log before we relog an
    3154             :  * inode.  max_key_type indicates the highest item type to remove.
    3155             :  * This cannot be run for file data extents because it does not
    3156             :  * free the extents they point to.
    3157             :  */
    3158         313 : static int drop_objectid_items(struct btrfs_trans_handle *trans,
    3159             :                                   struct btrfs_root *log,
    3160             :                                   struct btrfs_path *path,
    3161             :                                   u64 objectid, int max_key_type)
    3162             : {
    3163             :         int ret;
    3164             :         struct btrfs_key key;
    3165             :         struct btrfs_key found_key;
    3166             :         int start_slot;
    3167             : 
    3168         313 :         key.objectid = objectid;
    3169         313 :         key.type = max_key_type;
    3170         313 :         key.offset = (u64)-1;
    3171             : 
    3172             :         while (1) {
    3173         456 :                 ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
    3174         456 :                 BUG_ON(ret == 0); /* Logic error */
    3175         456 :                 if (ret < 0)
    3176             :                         break;
    3177             : 
    3178         456 :                 if (path->slots[0] == 0)
    3179             :                         break;
    3180             : 
    3181         266 :                 path->slots[0]--;
    3182         266 :                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
    3183             :                                       path->slots[0]);
    3184             : 
    3185         266 :                 if (found_key.objectid != objectid)
    3186             :                         break;
    3187             : 
    3188         215 :                 found_key.offset = 0;
    3189         215 :                 found_key.type = 0;
    3190         215 :                 ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
    3191             :                                        &start_slot);
    3192             : 
    3193         215 :                 ret = btrfs_del_items(trans, log, path, start_slot,
    3194         215 :                                       path->slots[0] - start_slot + 1);
    3195             :                 /*
    3196             :                  * If start slot isn't 0 then we don't need to re-search, we've
    3197             :                  * found the last guy with the objectid in this tree.
    3198             :                  */
    3199         215 :                 if (ret || start_slot != 0)
    3200             :                         break;
    3201         143 :                 btrfs_release_path(path);
    3202         143 :         }
    3203         313 :         btrfs_release_path(path);
    3204         313 :         if (ret > 0)
    3205             :                 ret = 0;
    3206         313 :         return ret;
    3207             : }
    3208             : 
    3209        1808 : static void fill_inode_item(struct btrfs_trans_handle *trans,
    3210             :                             struct extent_buffer *leaf,
    3211             :                             struct btrfs_inode_item *item,
    3212             :                             struct inode *inode, int log_inode_only)
    3213             : {
    3214             :         struct btrfs_map_token token;
    3215             : 
    3216             :         btrfs_init_map_token(&token);
    3217             : 
    3218        1808 :         if (log_inode_only) {
    3219             :                 /* set the generation to zero so the recover code
    3220             :                  * can tell the difference between an logging
    3221             :                  * just to say 'this inode exists' and a logging
    3222             :                  * to say 'update this inode with these values'
    3223             :                  */
    3224             :                 btrfs_set_token_inode_generation(leaf, item, 0, &token);
    3225             :                 btrfs_set_token_inode_size(leaf, item, 0, &token);
    3226             :         } else {
    3227        1468 :                 btrfs_set_token_inode_generation(leaf, item,
    3228             :                                                  BTRFS_I(inode)->generation,
    3229             :                                                  &token);
    3230        1468 :                 btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
    3231             :         }
    3232             : 
    3233             :         btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
    3234             :         btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
    3235        1808 :         btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
    3236        1808 :         btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
    3237             : 
    3238        1808 :         btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
    3239        1808 :                                      inode->i_atime.tv_sec, &token);
    3240        1808 :         btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
    3241        1808 :                                       inode->i_atime.tv_nsec, &token);
    3242             : 
    3243        1808 :         btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
    3244        1808 :                                      inode->i_mtime.tv_sec, &token);
    3245        1808 :         btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
    3246        1808 :                                       inode->i_mtime.tv_nsec, &token);
    3247             : 
    3248        1808 :         btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
    3249        1808 :                                      inode->i_ctime.tv_sec, &token);
    3250        1808 :         btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
    3251        1808 :                                       inode->i_ctime.tv_nsec, &token);
    3252             : 
    3253        1808 :         btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
    3254             :                                      &token);
    3255             : 
    3256        1808 :         btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
    3257        1808 :         btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
    3258        1808 :         btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
    3259        1808 :         btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
    3260             :         btrfs_set_token_inode_block_group(leaf, item, 0, &token);
    3261        1808 : }
    3262             : 
    3263        1183 : static int log_inode_item(struct btrfs_trans_handle *trans,
    3264             :                           struct btrfs_root *log, struct btrfs_path *path,
    3265             :                           struct inode *inode)
    3266             : {
    3267             :         struct btrfs_inode_item *inode_item;
    3268             :         int ret;
    3269             : 
    3270        1183 :         ret = btrfs_insert_empty_item(trans, log, path,
    3271             :                                       &BTRFS_I(inode)->location,
    3272             :                                       sizeof(*inode_item));
    3273        1183 :         if (ret && ret != -EEXIST)
    3274             :                 return ret;
    3275        2366 :         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
    3276             :                                     struct btrfs_inode_item);
    3277        1183 :         fill_inode_item(trans, path->nodes[0], inode_item, inode, 0);
    3278        1183 :         btrfs_release_path(path);
    3279        1183 :         return 0;
    3280             : }
    3281             : 
    3282         710 : static noinline int copy_items(struct btrfs_trans_handle *trans,
    3283             :                                struct inode *inode,
    3284             :                                struct btrfs_path *dst_path,
    3285             :                                struct btrfs_path *src_path, u64 *last_extent,
    3286             :                                int start_slot, int nr, int inode_only)
    3287             : {
    3288             :         unsigned long src_offset;
    3289             :         unsigned long dst_offset;
    3290         710 :         struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
    3291             :         struct btrfs_file_extent_item *extent;
    3292             :         struct btrfs_inode_item *inode_item;
    3293         710 :         struct extent_buffer *src = src_path->nodes[0];
    3294             :         struct btrfs_key first_key, last_key, key;
    3295             :         int ret;
    3296             :         struct btrfs_key *ins_keys;
    3297             :         u32 *ins_sizes;
    3298             :         char *ins_data;
    3299             :         int i;
    3300             :         struct list_head ordered_sums;
    3301         710 :         int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
    3302             :         bool has_extents = false;
    3303             :         bool need_find_last_extent = true;
    3304             :         bool done = false;
    3305             : 
    3306             :         INIT_LIST_HEAD(&ordered_sums);
    3307             : 
    3308         710 :         ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
    3309             :                            nr * sizeof(u32), GFP_NOFS);
    3310         710 :         if (!ins_data)
    3311             :                 return -ENOMEM;
    3312             : 
    3313         710 :         first_key.objectid = (u64)-1;
    3314             : 
    3315             :         ins_sizes = (u32 *)ins_data;
    3316         710 :         ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
    3317             : 
    3318       13431 :         for (i = 0; i < nr; i++) {
    3319       25442 :                 ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot);
    3320       25442 :                 btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
    3321             :         }
    3322         710 :         ret = btrfs_insert_empty_items(trans, log, dst_path,
    3323             :                                        ins_keys, ins_sizes, nr);
    3324         710 :         if (ret) {
    3325           0 :                 kfree(ins_data);
    3326           0 :                 return ret;
    3327             :         }
    3328             : 
    3329       12721 :         for (i = 0; i < nr; i++, dst_path->slots[0]++) {
    3330       25442 :                 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
    3331             :                                                    dst_path->slots[0]);
    3332             : 
    3333       25442 :                 src_offset = btrfs_item_ptr_offset(src, start_slot + i);
    3334             : 
    3335       12721 :                 if ((i == (nr - 1)))
    3336         710 :                         last_key = ins_keys[i];
    3337             : 
    3338       12721 :                 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
    3339        1250 :                         inode_item = btrfs_item_ptr(dst_path->nodes[0],
    3340             :                                                     dst_path->slots[0],
    3341             :                                                     struct btrfs_inode_item);
    3342         625 :                         fill_inode_item(trans, dst_path->nodes[0], inode_item,
    3343             :                                         inode, inode_only == LOG_INODE_EXISTS);
    3344             :                 } else {
    3345       12096 :                         copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
    3346       12096 :                                            src_offset, ins_sizes[i]);
    3347             :                 }
    3348             : 
    3349             :                 /*
    3350             :                  * We set need_find_last_extent here in case we know we were
    3351             :                  * processing other items and then walk into the first extent in
    3352             :                  * the inode.  If we don't hit an extent then nothing changes,
    3353             :                  * we'll do the last search the next time around.
    3354             :                  */
    3355       12721 :                 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
    3356             :                         has_extents = true;
    3357       11371 :                         if (first_key.objectid == (u64)-1)
    3358         285 :                                 first_key = ins_keys[i];
    3359             :                 } else {
    3360             :                         need_find_last_extent = false;
    3361             :                 }
    3362             : 
    3363             :                 /* take a reference on file data extents so that truncates
    3364             :                  * or deletes of this inode don't have to relog the inode
    3365             :                  * again
    3366             :                  */
    3367       12721 :                 if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY &&
    3368             :                     !skip_csum) {
    3369             :                         int found_type;
    3370       11371 :                         extent = btrfs_item_ptr(src, start_slot + i,
    3371             :                                                 struct btrfs_file_extent_item);
    3372             : 
    3373       11371 :                         if (btrfs_file_extent_generation(src, extent) < trans->transid)
    3374       10608 :                                 continue;
    3375             : 
    3376             :                         found_type = btrfs_file_extent_type(src, extent);
    3377         763 :                         if (found_type == BTRFS_FILE_EXTENT_REG) {
    3378             :                                 u64 ds, dl, cs, cl;
    3379             :                                 ds = btrfs_file_extent_disk_bytenr(src,
    3380             :                                                                 extent);
    3381             :                                 /* ds == 0 is a hole */
    3382         662 :                                 if (ds == 0)
    3383         164 :                                         continue;
    3384             : 
    3385             :                                 dl = btrfs_file_extent_disk_num_bytes(src,
    3386             :                                                                 extent);
    3387             :                                 cs = btrfs_file_extent_offset(src, extent);
    3388             :                                 cl = btrfs_file_extent_num_bytes(src,
    3389             :                                                                 extent);
    3390         498 :                                 if (btrfs_file_extent_compression(src,
    3391             :                                                                   extent)) {
    3392             :                                         cs = 0;
    3393             :                                         cl = dl;
    3394             :                                 }
    3395             : 
    3396         996 :                                 ret = btrfs_lookup_csums_range(
    3397         498 :                                                 log->fs_info->csum_root,
    3398         498 :                                                 ds + cs, ds + cs + cl - 1,
    3399             :                                                 &ordered_sums, 0);
    3400         498 :                                 if (ret) {
    3401           0 :                                         btrfs_release_path(dst_path);
    3402           0 :                                         kfree(ins_data);
    3403           0 :                                         return ret;
    3404             :                                 }
    3405             :                         }
    3406             :                 }
    3407             :         }
    3408             : 
    3409         710 :         btrfs_mark_buffer_dirty(dst_path->nodes[0]);
    3410         710 :         btrfs_release_path(dst_path);
    3411         710 :         kfree(ins_data);
    3412             : 
    3413             :         /*
    3414             :          * we have to do this after the loop above to avoid changing the
    3415             :          * log tree while trying to change the log tree.
    3416             :          */
    3417             :         ret = 0;
    3418        1921 :         while (!list_empty(&ordered_sums)) {
    3419         501 :                 struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
    3420             :                                                    struct btrfs_ordered_sum,
    3421             :                                                    list);
    3422         501 :                 if (!ret)
    3423         501 :                         ret = btrfs_csum_file_blocks(trans, log, sums);
    3424         501 :                 list_del(&sums->list);
    3425         501 :                 kfree(sums);
    3426             :         }
    3427             : 
    3428         710 :         if (!has_extents)
    3429             :                 return ret;
    3430             : 
    3431         285 :         if (need_find_last_extent && *last_extent == first_key.offset) {
    3432             :                 /*
    3433             :                  * We don't have any leafs between our current one and the one
    3434             :                  * we processed before that can have file extent items for our
    3435             :                  * inode (and have a generation number smaller than our current
    3436             :                  * transaction id).
    3437             :                  */
    3438             :                 need_find_last_extent = false;
    3439             :         }
    3440             : 
    3441             :         /*
    3442             :          * Because we use btrfs_search_forward we could skip leaves that were
    3443             :          * not modified and then assume *last_extent is valid when it really
    3444             :          * isn't.  So back up to the previous leaf and read the end of the last
    3445             :          * extent before we go and fill in holes.
    3446             :          */
    3447         285 :         if (need_find_last_extent) {
    3448             :                 u64 len;
    3449             : 
    3450          21 :                 ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
    3451          21 :                 if (ret < 0)
    3452             :                         return ret;
    3453          21 :                 if (ret)
    3454             :                         goto fill_holes;
    3455          21 :                 if (src_path->slots[0])
    3456          21 :                         src_path->slots[0]--;
    3457          21 :                 src = src_path->nodes[0];
    3458          21 :                 btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
    3459          63 :                 if (key.objectid != btrfs_ino(inode) ||
    3460          21 :                     key.type != BTRFS_EXTENT_DATA_KEY)
    3461             :                         goto fill_holes;
    3462          42 :                 extent = btrfs_item_ptr(src, src_path->slots[0],
    3463             :                                         struct btrfs_file_extent_item);
    3464          21 :                 if (btrfs_file_extent_type(src, extent) ==
    3465             :                     BTRFS_FILE_EXTENT_INLINE) {
    3466           0 :                         len = btrfs_file_extent_inline_len(src,
    3467             :                                                            src_path->slots[0],
    3468             :                                                            extent);
    3469           0 :                         *last_extent = ALIGN(key.offset + len,
    3470             :                                              log->sectorsize);
    3471             :                 } else {
    3472             :                         len = btrfs_file_extent_num_bytes(src, extent);
    3473          21 :                         *last_extent = key.offset + len;
    3474             :                 }
    3475             :         }
    3476             : fill_holes:
    3477             :         /* So we did prev_leaf, now we need to move to the next leaf, but a few
    3478             :          * things could have happened
    3479             :          *
    3480             :          * 1) A merge could have happened, so we could currently be on a leaf
    3481             :          * that holds what we were copying in the first place.
    3482             :          * 2) A split could have happened, and now not all of the items we want
    3483             :          * are on the same leaf.
    3484             :          *
    3485             :          * So we need to adjust how we search for holes, we need to drop the
    3486             :          * path and re-search for the first extent key we found, and then walk
    3487             :          * forward until we hit the last one we copied.
    3488             :          */
    3489         285 :         if (need_find_last_extent) {
    3490             :                 /* btrfs_prev_leaf could return 1 without releasing the path */
    3491          21 :                 btrfs_release_path(src_path);
    3492          21 :                 ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
    3493             :                                         src_path, 0, 0);
    3494          21 :                 if (ret < 0)
    3495             :                         return ret;
    3496             :                 ASSERT(ret == 0);
    3497          21 :                 src = src_path->nodes[0];
    3498          21 :                 i = src_path->slots[0];
    3499             :         } else {
    3500             :                 i = start_slot;
    3501             :         }
    3502             : 
    3503             :         /*
    3504             :          * Ok so here we need to go through and fill in any holes we may have
    3505             :          * to make sure that holes are punched for those areas in case they had
    3506             :          * extents previously.
    3507             :          */
    3508       12121 :         while (!done) {
    3509             :                 u64 offset, len;
    3510             :                 u64 extent_end;
    3511             : 
    3512       23672 :                 if (i >= btrfs_header_nritems(src_path->nodes[0])) {
    3513           0 :                         ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
    3514           0 :                         if (ret < 0)
    3515             :                                 return ret;
    3516             :                         ASSERT(ret == 0);
    3517           0 :                         src = src_path->nodes[0];
    3518             :                         i = 0;
    3519             :                 }
    3520             : 
    3521       11836 :                 btrfs_item_key_to_cpu(src, &key, i);
    3522       11836 :                 if (!btrfs_comp_cpu_keys(&key, &last_key))
    3523             :                         done = true;
    3524       35508 :                 if (key.objectid != btrfs_ino(inode) ||
    3525       11836 :                     key.type != BTRFS_EXTENT_DATA_KEY) {
    3526         465 :                         i++;
    3527         465 :                         continue;
    3528             :                 }
    3529       11371 :                 extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
    3530       11371 :                 if (btrfs_file_extent_type(src, extent) ==
    3531             :                     BTRFS_FILE_EXTENT_INLINE) {
    3532          14 :                         len = btrfs_file_extent_inline_len(src, i, extent);
    3533          14 :                         extent_end = ALIGN(key.offset + len, log->sectorsize);
    3534             :                 } else {
    3535             :                         len = btrfs_file_extent_num_bytes(src, extent);
    3536       11357 :                         extent_end = key.offset + len;
    3537             :                 }
    3538       11371 :                 i++;
    3539             : 
    3540       11371 :                 if (*last_extent == key.offset) {
    3541       11371 :                         *last_extent = extent_end;
    3542       11371 :                         continue;
    3543             :                 }
    3544             :                 offset = *last_extent;
    3545           0 :                 len = key.offset - *last_extent;
    3546           0 :                 ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
    3547             :                                                offset, 0, 0, len, 0, len, 0,
    3548             :                                                0, 0);
    3549           0 :                 if (ret)
    3550             :                         break;
    3551           0 :                 *last_extent = extent_end;
    3552             :         }
    3553             :         /*
    3554             :          * Need to let the callers know we dropped the path so they should
    3555             :          * re-search.
    3556             :          */
    3557         285 :         if (!ret && need_find_last_extent)
    3558             :                 ret = 1;
    3559         285 :         return ret;
    3560             : }
    3561             : 
    3562        1322 : static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
    3563             : {
    3564             :         struct extent_map *em1, *em2;
    3565             : 
    3566             :         em1 = list_entry(a, struct extent_map, list);
    3567             :         em2 = list_entry(b, struct extent_map, list);
    3568             : 
    3569        1322 :         if (em1->start < em2->start)
    3570             :                 return -1;
    3571         160 :         else if (em1->start > em2->start)
    3572             :                 return 1;
    3573           0 :         return 0;
    3574             : }
    3575             : 
    3576        2403 : static int log_one_extent(struct btrfs_trans_handle *trans,
    3577             :                           struct inode *inode, struct btrfs_root *root,
    3578             :                           struct extent_map *em, struct btrfs_path *path,
    3579             :                           struct list_head *logged_list)
    3580             : {
    3581        2403 :         struct btrfs_root *log = root->log_root;
    3582             :         struct btrfs_file_extent_item *fi;
    3583             :         struct extent_buffer *leaf;
    3584             :         struct btrfs_ordered_extent *ordered;
    3585             :         struct list_head ordered_sums;
    3586             :         struct btrfs_map_token token;
    3587             :         struct btrfs_key key;
    3588        2403 :         u64 mod_start = em->mod_start;
    3589        2403 :         u64 mod_len = em->mod_len;
    3590             :         u64 csum_offset;
    3591             :         u64 csum_len;
    3592        2403 :         u64 extent_offset = em->start - em->orig_start;
    3593             :         u64 block_len;
    3594             :         int ret;
    3595        2403 :         bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
    3596        2403 :         int extent_inserted = 0;
    3597             : 
    3598             :         INIT_LIST_HEAD(&ordered_sums);
    3599             :         btrfs_init_map_token(&token);
    3600             : 
    3601        2403 :         ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
    3602        2403 :                                    em->start + em->len, NULL, 0, 1,
    3603             :                                    sizeof(*fi), &extent_inserted);
    3604        2403 :         if (ret)
    3605             :                 return ret;
    3606             : 
    3607        2403 :         if (!extent_inserted) {
    3608          54 :                 key.objectid = btrfs_ino(inode);
    3609          54 :                 key.type = BTRFS_EXTENT_DATA_KEY;
    3610          54 :                 key.offset = em->start;
    3611             : 
    3612             :                 ret = btrfs_insert_empty_item(trans, log, path, &key,
    3613             :                                               sizeof(*fi));
    3614          54 :                 if (ret)
    3615             :                         return ret;
    3616             :         }
    3617        2403 :         leaf = path->nodes[0];
    3618        4806 :         fi = btrfs_item_ptr(leaf, path->slots[0],
    3619             :                             struct btrfs_file_extent_item);
    3620             : 
    3621        2403 :         btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
    3622             :                                                &token);
    3623        2403 :         if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
    3624             :                 skip_csum = true;
    3625             :                 btrfs_set_token_file_extent_type(leaf, fi,
    3626             :                                                  BTRFS_FILE_EXTENT_PREALLOC,
    3627             :                                                  &token);
    3628             :         } else {
    3629             :                 btrfs_set_token_file_extent_type(leaf, fi,
    3630             :                                                  BTRFS_FILE_EXTENT_REG,
    3631             :                                                  &token);
    3632        2369 :                 if (em->block_start == EXTENT_MAP_HOLE)
    3633             :                         skip_csum = true;
    3634             :         }
    3635             : 
    3636        2403 :         block_len = max(em->block_len, em->orig_block_len);
    3637        2403 :         if (em->compress_type != BTRFS_COMPRESS_NONE) {
    3638           3 :                 btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
    3639             :                                                         em->block_start,
    3640             :                                                         &token);
    3641             :                 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
    3642             :                                                            &token);
    3643        2400 :         } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
    3644        2323 :                 btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
    3645             :                                                         em->block_start -
    3646             :                                                         extent_offset, &token);
    3647             :                 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
    3648             :                                                            &token);
    3649             :         } else {
    3650             :                 btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
    3651             :                 btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
    3652             :                                                            &token);
    3653             :         }
    3654             : 
    3655        2403 :         btrfs_set_token_file_extent_offset(leaf, fi,
    3656        2403 :                                            em->start - em->orig_start,
    3657             :                                            &token);
    3658        2403 :         btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
    3659        2403 :         btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
    3660        2403 :         btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
    3661             :                                                 &token);
    3662             :         btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
    3663             :         btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
    3664        2403 :         btrfs_mark_buffer_dirty(leaf);
    3665             : 
    3666        2403 :         btrfs_release_path(path);
    3667        2403 :         if (ret) {
    3668             :                 return ret;
    3669             :         }
    3670             : 
    3671        2403 :         if (skip_csum)
    3672             :                 return 0;
    3673             : 
    3674             :         /*
    3675             :          * First check and see if our csums are on our outstanding ordered
    3676             :          * extents.
    3677             :          */
    3678        4595 :         list_for_each_entry(ordered, logged_list, log_list) {
    3679             :                 struct btrfs_ordered_sum *sum;
    3680             : 
    3681        2327 :                 if (!mod_len)
    3682             :                         break;
    3683             : 
    3684        3508 :                 if (ordered->file_offset + ordered->len <= mod_start ||
    3685        1205 :                     mod_start + mod_len <= ordered->file_offset)
    3686        1128 :                         continue;
    3687             : 
    3688             :                 /*
    3689             :                  * We are going to copy all the csums on this ordered extent, so
    3690             :                  * go ahead and adjust mod_start and mod_len in case this
    3691             :                  * ordered extent has already been logged.
    3692             :                  */
    3693        1175 :                 if (ordered->file_offset > mod_start) {
    3694           0 :                         if (ordered->file_offset + ordered->len >=
    3695             :                             mod_start + mod_len)
    3696           0 :                                 mod_len = ordered->file_offset - mod_start;
    3697             :                         /*
    3698             :                          * If we have this case
    3699             :                          *
    3700             :                          * |--------- logged extent ---------|
    3701             :                          *       |----- ordered extent ----|
    3702             :                          *
    3703             :                          * Just don't mess with mod_start and mod_len, we'll
    3704             :                          * just end up logging more csums than we need and it
    3705             :                          * will be ok.
    3706             :                          */
    3707             :                 } else {
    3708        1175 :                         if (ordered->file_offset + ordered->len <
    3709             :                             mod_start + mod_len) {
    3710           0 :                                 mod_len = (mod_start + mod_len) -
    3711             :                                         (ordered->file_offset + ordered->len);
    3712             :                                 mod_start = ordered->file_offset +
    3713             :                                         ordered->len;
    3714             :                         } else {
    3715             :                                 mod_len = 0;
    3716             :                         }
    3717             :                 }
    3718             : 
    3719             :                 /*
    3720             :                  * To keep us from looping for the above case of an ordered
    3721             :                  * extent that falls inside of the logged extent.
    3722             :                  */
    3723        1175 :                 if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
    3724        1175 :                                      &ordered->flags))
    3725           0 :                         continue;
    3726             : 
    3727        1175 :                 if (ordered->csum_bytes_left) {
    3728           0 :                         btrfs_start_ordered_extent(inode, ordered, 0);
    3729           0 :                         wait_event(ordered->wait,
    3730             :                                    ordered->csum_bytes_left == 0);
    3731             :                 }
    3732             : 
    3733        2350 :                 list_for_each_entry(sum, &ordered->list, list) {
    3734        1175 :                         ret = btrfs_csum_file_blocks(trans, log, sum);
    3735        1175 :                         if (ret)
    3736             :                                 goto unlocked;
    3737             :                 }
    3738             : 
    3739             :         }
    3740             : unlocked:
    3741             : 
    3742        2292 :         if (!mod_len || ret)
    3743             :                 return ret;
    3744             : 
    3745        1117 :         if (em->compress_type) {
    3746             :                 csum_offset = 0;
    3747             :                 csum_len = block_len;
    3748             :         } else {
    3749        1114 :                 csum_offset = mod_start - em->start;
    3750             :                 csum_len = mod_len;
    3751             :         }
    3752             : 
    3753             :         /* block start is already adjusted for the file extent offset. */
    3754        1117 :         ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
    3755             :                                        em->block_start + csum_offset,
    3756        1117 :                                        em->block_start + csum_offset +
    3757             :                                        csum_len - 1, &ordered_sums, 0);
    3758        1117 :         if (ret)
    3759             :                 return ret;
    3760             : 
    3761        2234 :         while (!list_empty(&ordered_sums)) {
    3762        1117 :                 struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
    3763             :                                                    struct btrfs_ordered_sum,
    3764             :                                                    list);
    3765        1117 :                 if (!ret)
    3766        1117 :                         ret = btrfs_csum_file_blocks(trans, log, sums);
    3767        1117 :                 list_del(&sums->list);
    3768        1117 :                 kfree(sums);
    3769             :         }
    3770             : 
    3771             :         return ret;
    3772             : }
    3773             : 
    3774        1195 : static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
    3775        2403 :                                      struct btrfs_root *root,
    3776             :                                      struct inode *inode,
    3777             :                                      struct btrfs_path *path,
    3778             :                                      struct list_head *logged_list)
    3779             : {
    3780             :         struct extent_map *em, *n;
    3781             :         struct list_head extents;
    3782        1195 :         struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
    3783             :         u64 test_gen;
    3784             :         int ret = 0;
    3785             :         int num = 0;
    3786             : 
    3787             :         INIT_LIST_HEAD(&extents);
    3788             : 
    3789        1195 :         write_lock(&tree->lock);
    3790        1195 :         test_gen = root->fs_info->last_trans_committed;
    3791             : 
    3792        4270 :         list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
    3793             :                 list_del_init(&em->list);
    3794             : 
    3795             :                 /*
    3796             :                  * Just an arbitrary number, this can be really CPU intensive
    3797             :                  * once we start getting a lot of extents, and really once we
    3798             :                  * have a bunch of extents we just want to commit since it will
    3799             :                  * be faster.
    3800             :                  */
    3801        3075 :                 if (++num > 32768) {
    3802             :                         list_del_init(&tree->modified_extents);
    3803             :                         ret = -EFBIG;
    3804           0 :                         goto process;
    3805             :                 }
    3806             : 
    3807        3075 :                 if (em->generation <= test_gen)
    3808         672 :                         continue;
    3809             :                 /* Need a ref to keep it from getting evicted from cache */
    3810        2403 :                 atomic_inc(&em->refs);
    3811             :                 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
    3812             :                 list_add_tail(&em->list, &extents);
    3813        2403 :                 num++;
    3814             :         }
    3815             : 
    3816        1195 :         list_sort(NULL, &extents, extent_cmp);
    3817             : 
    3818             : process:
    3819        3598 :         while (!list_empty(&extents)) {
    3820        2403 :                 em = list_entry(extents.next, struct extent_map, list);
    3821             : 
    3822        2403 :                 list_del_init(&em->list);
    3823             : 
    3824             :                 /*
    3825             :                  * If we had an error we just need to delete everybody from our
    3826             :                  * private list.
    3827             :                  */
    3828        2403 :                 if (ret) {
    3829           0 :                         clear_em_logging(tree, em);
    3830           0 :                         free_extent_map(em);
    3831           0 :                         continue;
    3832             :                 }
    3833             : 
    3834             :                 write_unlock(&tree->lock);
    3835             : 
    3836        2403 :                 ret = log_one_extent(trans, inode, root, em, path, logged_list);
    3837        2403 :                 write_lock(&tree->lock);
    3838        2403 :                 clear_em_logging(tree, em);
    3839        2403 :                 free_extent_map(em);
    3840             :         }
    3841        1195 :         WARN_ON(!list_empty(&extents));
    3842             :         write_unlock(&tree->lock);
    3843             : 
    3844        1195 :         btrfs_release_path(path);
    3845        1195 :         return ret;
    3846             : }
    3847             : 
    3848             : /* log a single inode in the tree log.
    3849             :  * At least one parent directory for this inode must exist in the tree
    3850             :  * or be logged already.
    3851             :  *
    3852             :  * Any items from this inode changed by the current transaction are copied
    3853             :  * to the log tree.  An extra reference is taken on any extents in this
    3854             :  * file, allowing us to avoid a whole pile of corner cases around logging
    3855             :  * blocks that have been removed from the tree.
    3856             :  *
    3857             :  * See LOG_INODE_ALL and related defines for a description of what inode_only
    3858             :  * does.
    3859             :  *
    3860             :  * This handles both files and directories.
    3861             :  */
    3862        1824 : static int btrfs_log_inode(struct btrfs_trans_handle *trans,
    3863             :                            struct btrfs_root *root, struct inode *inode,
    3864             :                            int inode_only,
    3865             :                            const loff_t start,
    3866             :                            const loff_t end)
    3867             : {
    3868             :         struct btrfs_path *path;
    3869             :         struct btrfs_path *dst_path;
    3870             :         struct btrfs_key min_key;
    3871             :         struct btrfs_key max_key;
    3872        1824 :         struct btrfs_root *log = root->log_root;
    3873             :         struct extent_buffer *src = NULL;
    3874        1824 :         LIST_HEAD(logged_list);
    3875        1824 :         u64 last_extent = 0;
    3876             :         int err = 0;
    3877             :         int ret;
    3878             :         int nritems;
    3879             :         int ins_start_slot = 0;
    3880             :         int ins_nr;
    3881             :         bool fast_search = false;
    3882             :         u64 ino = btrfs_ino(inode);
    3883             :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
    3884             : 
    3885        1824 :         path = btrfs_alloc_path();
    3886        1824 :         if (!path)
    3887             :                 return -ENOMEM;
    3888        1824 :         dst_path = btrfs_alloc_path();
    3889        1824 :         if (!dst_path) {
    3890           0 :                 btrfs_free_path(path);
    3891           0 :                 return -ENOMEM;
    3892             :         }
    3893             : 
    3894        1824 :         min_key.objectid = ino;
    3895        1824 :         min_key.type = BTRFS_INODE_ITEM_KEY;
    3896        1824 :         min_key.offset = 0;
    3897             : 
    3898             :         max_key.objectid = ino;
    3899             : 
    3900             : 
    3901             :         /* today the code can only do partial logging of directories */
    3902        3388 :         if (S_ISDIR(inode->i_mode) ||
    3903             :             (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    3904        1236 :                        &BTRFS_I(inode)->runtime_flags) &&
    3905             :              inode_only == LOG_INODE_EXISTS))
    3906             :                 max_key.type = BTRFS_XATTR_ITEM_KEY;
    3907             :         else
    3908             :                 max_key.type = (u8)-1;
    3909             :         max_key.offset = (u64)-1;
    3910             : 
    3911             :         /* Only run delayed items if we are a dir or a new file */
    3912        3388 :         if (S_ISDIR(inode->i_mode) ||
    3913        1564 :             BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) {
    3914        1527 :                 ret = btrfs_commit_inode_delayed_items(trans, inode);
    3915        1527 :                 if (ret) {
    3916           0 :                         btrfs_free_path(path);
    3917           0 :                         btrfs_free_path(dst_path);
    3918           0 :                         return ret;
    3919             :                 }
    3920             :         }
    3921             : 
    3922        1824 :         mutex_lock(&BTRFS_I(inode)->log_mutex);
    3923             : 
    3924        1824 :         btrfs_get_logged_extents(inode, &logged_list);
    3925             : 
    3926             :         /*
    3927             :          * a brute force approach to making sure we get the most uptodate
    3928             :          * copies of everything.
    3929             :          */
    3930        1824 :         if (S_ISDIR(inode->i_mode)) {
    3931             :                 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
    3932             : 
    3933         260 :                 if (inode_only == LOG_INODE_EXISTS)
    3934             :                         max_key_type = BTRFS_XATTR_ITEM_KEY;
    3935         260 :                 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
    3936             :         } else {
    3937        1564 :                 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    3938        1564 :                                        &BTRFS_I(inode)->runtime_flags)) {
    3939             :                         clear_bit(BTRFS_INODE_COPY_EVERYTHING,
    3940             :                                   &BTRFS_I(inode)->runtime_flags);
    3941         328 :                         ret = btrfs_truncate_inode_items(trans, log,
    3942             :                                                          inode, 0, 0);
    3943        1236 :                 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
    3944        1211 :                                               &BTRFS_I(inode)->runtime_flags) ||
    3945             :                            inode_only == LOG_INODE_EXISTS) {
    3946          53 :                         if (inode_only == LOG_INODE_ALL)
    3947             :                                 fast_search = true;
    3948             :                         max_key.type = BTRFS_XATTR_ITEM_KEY;
    3949          53 :                         ret = drop_objectid_items(trans, log, path, ino,
    3950             :                                                   max_key.type);
    3951             :                 } else {
    3952        1183 :                         if (inode_only == LOG_INODE_ALL)
    3953             :                                 fast_search = true;
    3954        1183 :                         ret = log_inode_item(trans, log, dst_path, inode);
    3955        1183 :                         if (ret) {
    3956             :                                 err = ret;
    3957             :                                 goto out_unlock;
    3958             :                         }
    3959             :                         goto log_extents;
    3960             :                 }
    3961             : 
    3962             :         }
    3963         641 :         if (ret) {
    3964             :                 err = ret;
    3965             :                 goto out_unlock;
    3966             :         }
    3967         641 :         path->keep_locks = 1;
    3968             : 
    3969             :         while (1) {
    3970             :                 ins_nr = 0;
    3971         783 :                 ret = btrfs_search_forward(root, &min_key,
    3972             :                                            path, trans->transid);
    3973         783 :                 if (ret != 0)
    3974             :                         break;
    3975             : again:
    3976             :                 /* note, ins_nr might be > 0 here, cleanup outside the loop */
    3977       13306 :                 if (min_key.objectid != ino)
    3978             :                         break;
    3979       13020 :                 if (min_key.type > max_key.type)
    3980             :                         break;
    3981             : 
    3982             :                 src = path->nodes[0];
    3983       12721 :                 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
    3984       12011 :                         ins_nr++;
    3985       12011 :                         goto next_slot;
    3986         710 :                 } else if (!ins_nr) {
    3987         710 :                         ins_start_slot = path->slots[0];
    3988             :                         ins_nr = 1;
    3989         710 :                         goto next_slot;
    3990             :                 }
    3991             : 
    3992           0 :                 ret = copy_items(trans, inode, dst_path, path, &last_extent,
    3993             :                                  ins_start_slot, ins_nr, inode_only);
    3994           0 :                 if (ret < 0) {
    3995             :                         err = ret;
    3996             :                         goto out_unlock;
    3997           0 :                 } if (ret) {
    3998             :                         ins_nr = 0;
    3999           0 :                         btrfs_release_path(path);
    4000           0 :                         continue;
    4001             :                 }
    4002             :                 ins_nr = 1;
    4003           0 :                 ins_start_slot = path->slots[0];
    4004             : next_slot:
    4005             : 
    4006       25442 :                 nritems = btrfs_header_nritems(path->nodes[0]);
    4007       12721 :                 path->slots[0]++;
    4008       12721 :                 if (path->slots[0] < nritems) {
    4009       12579 :                         btrfs_item_key_to_cpu(path->nodes[0], &min_key,
    4010             :                                               path->slots[0]);
    4011       12579 :                         goto again;
    4012             :                 }
    4013         142 :                 if (ins_nr) {
    4014         142 :                         ret = copy_items(trans, inode, dst_path, path,
    4015             :                                          &last_extent, ins_start_slot,
    4016             :                                          ins_nr, inode_only);
    4017         142 :                         if (ret < 0) {
    4018             :                                 err = ret;
    4019             :                                 goto out_unlock;
    4020             :                         }
    4021             :                         ret = 0;
    4022             :                         ins_nr = 0;
    4023             :                 }
    4024         142 :                 btrfs_release_path(path);
    4025             : 
    4026         142 :                 if (min_key.offset < (u64)-1) {
    4027         142 :                         min_key.offset++;
    4028           0 :                 } else if (min_key.type < max_key.type) {
    4029           0 :                         min_key.type++;
    4030           0 :                         min_key.offset = 0;
    4031             :                 } else {
    4032             :                         break;
    4033             :                 }
    4034             :         }
    4035         641 :         if (ins_nr) {
    4036         568 :                 ret = copy_items(trans, inode, dst_path, path, &last_extent,
    4037             :                                  ins_start_slot, ins_nr, inode_only);
    4038         568 :                 if (ret < 0) {
    4039             :                         err = ret;
    4040             :                         goto out_unlock;
    4041             :                 }
    4042             :                 ret = 0;
    4043             :                 ins_nr = 0;
    4044             :         }
    4045             : 
    4046             : log_extents:
    4047        1824 :         btrfs_release_path(path);
    4048        1824 :         btrfs_release_path(dst_path);
    4049        1824 :         if (fast_search) {
    4050        1195 :                 ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
    4051             :                                                 &logged_list);
    4052        1195 :                 if (ret) {
    4053             :                         err = ret;
    4054             :                         goto out_unlock;
    4055             :                 }
    4056         629 :         } else if (inode_only == LOG_INODE_ALL) {
    4057             :                 struct extent_map *em, *n;
    4058             : 
    4059         288 :                 write_lock(&em_tree->lock);
    4060             :                 /*
    4061             :                  * We can't just remove every em if we're called for a ranged
    4062             :                  * fsync - that is, one that doesn't cover the whole possible
    4063             :                  * file range (0 to LLONG_MAX). This is because we can have
    4064             :                  * em's that fall outside the range we're logging and therefore
    4065             :                  * their ordered operations haven't completed yet
    4066             :                  * (btrfs_finish_ordered_io() not invoked yet). This means we
    4067             :                  * didn't get their respective file extent item in the fs/subvol
    4068             :                  * tree yet, and need to let the next fast fsync (one which
    4069             :                  * consults the list of modified extent maps) find the em so
    4070             :                  * that it logs a matching file extent item and waits for the
    4071             :                  * respective ordered operation to complete (if it's still
    4072             :                  * running).
    4073             :                  *
    4074             :                  * Removing every em outside the range we're logging would make
    4075             :                  * the next fast fsync not log their matching file extent items,
    4076             :                  * therefore making us lose data after a log replay.
    4077             :                  */
    4078        3038 :                 list_for_each_entry_safe(em, n, &em_tree->modified_extents,
    4079             :                                          list) {
    4080        2750 :                         const u64 mod_end = em->mod_start + em->mod_len - 1;
    4081             : 
    4082        2750 :                         if (em->mod_start >= start && mod_end <= end)
    4083             :                                 list_del_init(&em->list);
    4084             :                 }
    4085             :                 write_unlock(&em_tree->lock);
    4086             :         }
    4087             : 
    4088        1824 :         if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
    4089           0 :                 ret = log_directory_changes(trans, root, inode, path, dst_path);
    4090           0 :                 if (ret) {
    4091             :                         err = ret;
    4092             :                         goto out_unlock;
    4093             :                 }
    4094             :         }
    4095             : 
    4096        1824 :         BTRFS_I(inode)->logged_trans = trans->transid;
    4097        1824 :         BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
    4098             : out_unlock:
    4099        1824 :         if (unlikely(err))
    4100           0 :                 btrfs_put_logged_extents(&logged_list);
    4101             :         else
    4102        1824 :                 btrfs_submit_logged_extents(&logged_list, log);
    4103        1824 :         mutex_unlock(&BTRFS_I(inode)->log_mutex);
    4104             : 
    4105        1824 :         btrfs_free_path(path);
    4106        1824 :         btrfs_free_path(dst_path);
    4107        1824 :         return err;
    4108             : }
    4109             : 
    4110             : /*
    4111             :  * follow the dentry parent pointers up the chain and see if any
    4112             :  * of the directories in it require a full commit before they can
    4113             :  * be logged.  Returns zero if nothing special needs to be done or 1 if
    4114             :  * a full commit is required.
    4115             :  */
    4116        1727 : static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
    4117             :                                                struct inode *inode,
    4118             :                                                struct dentry *parent,
    4119             :                                                struct super_block *sb,
    4120             :                                                u64 last_committed)
    4121             : {
    4122             :         int ret = 0;
    4123             :         struct btrfs_root *root;
    4124             :         struct dentry *old_parent = NULL;
    4125             :         struct inode *orig_inode = inode;
    4126             : 
    4127             :         /*
    4128             :          * for regular files, if its inode is already on disk, we don't
    4129             :          * have to worry about the parents at all.  This is because
    4130             :          * we can use the last_unlink_trans field to record renames
    4131             :          * and other fun in this file.
    4132             :          */
    4133        3257 :         if (S_ISREG(inode->i_mode) &&
    4134        1911 :             BTRFS_I(inode)->generation <= last_committed &&
    4135         305 :             BTRFS_I(inode)->last_unlink_trans <= last_committed)
    4136             :                         goto out;
    4137             : 
    4138        1427 :         if (!S_ISDIR(inode->i_mode)) {
    4139        1413 :                 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
    4140             :                         goto out;
    4141             :                 inode = parent->d_inode;
    4142             :         }
    4143             : 
    4144             :         while (1) {
    4145             :                 /*
    4146             :                  * If we are logging a directory then we start with our inode,
    4147             :                  * not our parents inode, so we need to skipp setting the
    4148             :                  * logged_trans so that further down in the log code we don't
    4149             :                  * think this inode has already been logged.
    4150             :                  */
    4151        2680 :                 if (inode != orig_inode)
    4152        2666 :                         BTRFS_I(inode)->logged_trans = trans->transid;
    4153        2680 :                 smp_mb();
    4154             : 
    4155        2680 :                 if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
    4156          76 :                         root = BTRFS_I(inode)->root;
    4157             : 
    4158             :                         /*
    4159             :                          * make sure any commits to the log are forced
    4160             :                          * to be full commits
    4161             :                          */
    4162          76 :                         btrfs_set_log_full_commit(root->fs_info, trans);
    4163             :                         ret = 1;
    4164          76 :                         break;
    4165             :                 }
    4166             : 
    4167        2604 :                 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
    4168             :                         break;
    4169             : 
    4170        2604 :                 if (IS_ROOT(parent))
    4171             :                         break;
    4172             : 
    4173        1253 :                 parent = dget_parent(parent);
    4174        1253 :                 dput(old_parent);
    4175             :                 old_parent = parent;
    4176        1253 :                 inode = parent->d_inode;
    4177             : 
    4178        1253 :         }
    4179        1427 :         dput(old_parent);
    4180             : out:
    4181        1651 :         return ret;
    4182             : }
    4183             : 
    4184             : /*
    4185             :  * helper function around btrfs_log_inode to make sure newly created
    4186             :  * parent directories also end up in the log.  A minimal inode and backref
    4187             :  * only logging is done of any parent directories that are older than
    4188             :  * the last committed transaction
    4189             :  */
    4190        1682 : static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
    4191             :                                   struct btrfs_root *root, struct inode *inode,
    4192             :                                   struct dentry *parent,
    4193             :                                   const loff_t start,
    4194             :                                   const loff_t end,
    4195             :                                   int exists_only,
    4196             :                                   struct btrfs_log_ctx *ctx)
    4197             : {
    4198        1682 :         int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
    4199             :         struct super_block *sb;
    4200             :         struct dentry *old_parent = NULL;
    4201             :         int ret = 0;
    4202        1682 :         u64 last_committed = root->fs_info->last_trans_committed;
    4203             : 
    4204        1682 :         sb = inode->i_sb;
    4205             : 
    4206        1682 :         if (btrfs_test_opt(root, NOTREELOG)) {
    4207             :                 ret = 1;
    4208             :                 goto end_no_trans;
    4209             :         }
    4210             : 
    4211             :         /*
    4212             :          * The prev transaction commit doesn't complete, we need do
    4213             :          * full commit by ourselves.
    4214             :          */
    4215        1682 :         if (root->fs_info->last_trans_log_full_commit >
    4216             :             root->fs_info->last_trans_committed) {
    4217             :                 ret = 1;
    4218             :                 goto end_no_trans;
    4219             :         }
    4220             : 
    4221        3302 :         if (root != BTRFS_I(inode)->root ||
    4222             :             btrfs_root_refs(&root->root_item) == 0) {
    4223             :                 ret = 1;
    4224             :                 goto end_no_trans;
    4225             :         }
    4226             : 
    4227        1651 :         ret = check_parent_dirs_for_sync(trans, inode, parent,
    4228             :                                          sb, last_committed);
    4229        1651 :         if (ret)
    4230             :                 goto end_no_trans;
    4231             : 
    4232        3150 :         if (btrfs_inode_in_log(inode, trans->transid)) {
    4233             :                 ret = BTRFS_NO_LOG_SYNC;
    4234             :                 goto end_no_trans;
    4235             :         }
    4236             : 
    4237        1575 :         ret = start_log_trans(trans, root, ctx);
    4238        1575 :         if (ret)
    4239             :                 goto end_no_trans;
    4240             : 
    4241        1575 :         ret = btrfs_log_inode(trans, root, inode, inode_only, start, end);
    4242        1575 :         if (ret)
    4243             :                 goto end_trans;
    4244             : 
    4245             :         /*
    4246             :          * for regular files, if its inode is already on disk, we don't
    4247             :          * have to worry about the parents at all.  This is because
    4248             :          * we can use the last_unlink_trans field to record renames
    4249             :          * and other fun in this file.
    4250             :          */
    4251        3111 :         if (S_ISREG(inode->i_mode) &&
    4252        1814 :             BTRFS_I(inode)->generation <= last_committed &&
    4253         278 :             BTRFS_I(inode)->last_unlink_trans <= last_committed) {
    4254             :                 ret = 0;
    4255             :                 goto end_trans;
    4256             :         }
    4257             : 
    4258             :         inode_only = LOG_INODE_EXISTS;
    4259             :         while (1) {
    4260        2497 :                 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
    4261             :                         break;
    4262             : 
    4263             :                 inode = parent->d_inode;
    4264        2497 :                 if (root != BTRFS_I(inode)->root)
    4265             :                         break;
    4266             : 
    4267        4894 :                 if (BTRFS_I(inode)->generation >
    4268        2447 :                     root->fs_info->last_trans_committed) {
    4269         249 :                         ret = btrfs_log_inode(trans, root, inode, inode_only,
    4270             :                                               0, LLONG_MAX);
    4271         249 :                         if (ret)
    4272             :                                 goto end_trans;
    4273             :                 }
    4274        2447 :                 if (IS_ROOT(parent))
    4275             :                         break;
    4276             : 
    4277        1146 :                 parent = dget_parent(parent);
    4278        1146 :                 dput(old_parent);
    4279             :                 old_parent = parent;
    4280        1146 :         }
    4281             :         ret = 0;
    4282             : end_trans:
    4283        1575 :         dput(old_parent);
    4284        1575 :         if (ret < 0) {
    4285           0 :                 btrfs_set_log_full_commit(root->fs_info, trans);
    4286             :                 ret = 1;
    4287             :         }
    4288             : 
    4289        1575 :         if (ret)
    4290           0 :                 btrfs_remove_log_ctx(root, ctx);
    4291        1575 :         btrfs_end_log_trans(root);
    4292             : end_no_trans:
    4293        1682 :         return ret;
    4294             : }
    4295             : 
    4296             : /*
    4297             :  * it is not safe to log dentry if the chunk root has added new
    4298             :  * chunks.  This returns 0 if the dentry was logged, and 1 otherwise.
    4299             :  * If this returns 1, you must commit the transaction to safely get your
    4300             :  * data on disk.
    4301             :  */
    4302        1569 : int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
    4303             :                           struct btrfs_root *root, struct dentry *dentry,
    4304             :                           const loff_t start,
    4305             :                           const loff_t end,
    4306             :                           struct btrfs_log_ctx *ctx)
    4307             : {
    4308        1569 :         struct dentry *parent = dget_parent(dentry);
    4309             :         int ret;
    4310             : 
    4311        1569 :         ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
    4312             :                                      start, end, 0, ctx);
    4313        1569 :         dput(parent);
    4314             : 
    4315        1569 :         return ret;
    4316             : }
    4317             : 
    4318             : /*
    4319             :  * should be called during mount to recover any replay any log trees
    4320             :  * from the FS
    4321             :  */
    4322           0 : int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
    4323             : {
    4324             :         int ret;
    4325             :         struct btrfs_path *path;
    4326             :         struct btrfs_trans_handle *trans;
    4327             :         struct btrfs_key key;
    4328             :         struct btrfs_key found_key;
    4329             :         struct btrfs_key tmp_key;
    4330             :         struct btrfs_root *log;
    4331           0 :         struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
    4332           0 :         struct walk_control wc = {
    4333             :                 .process_func = process_one_buffer,
    4334             :                 .stage = 0,
    4335             :         };
    4336             : 
    4337           0 :         path = btrfs_alloc_path();
    4338           0 :         if (!path)
    4339             :                 return -ENOMEM;
    4340             : 
    4341           0 :         fs_info->log_root_recovering = 1;
    4342             : 
    4343           0 :         trans = btrfs_start_transaction(fs_info->tree_root, 0);
    4344           0 :         if (IS_ERR(trans)) {
    4345           0 :                 ret = PTR_ERR(trans);
    4346           0 :                 goto error;
    4347             :         }
    4348             : 
    4349           0 :         wc.trans = trans;
    4350           0 :         wc.pin = 1;
    4351             : 
    4352           0 :         ret = walk_log_tree(trans, log_root_tree, &wc);
    4353           0 :         if (ret) {
    4354           0 :                 btrfs_error(fs_info, ret, "Failed to pin buffers while "
    4355             :                             "recovering log root tree.");
    4356           0 :                 goto error;
    4357             :         }
    4358             : 
    4359             : again:
    4360           0 :         key.objectid = BTRFS_TREE_LOG_OBJECTID;
    4361           0 :         key.offset = (u64)-1;
    4362             :         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
    4363             : 
    4364             :         while (1) {
    4365           0 :                 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
    4366             : 
    4367           0 :                 if (ret < 0) {
    4368           0 :                         btrfs_error(fs_info, ret,
    4369             :                                     "Couldn't find tree log root.");
    4370           0 :                         goto error;
    4371             :                 }
    4372           0 :                 if (ret > 0) {
    4373           0 :                         if (path->slots[0] == 0)
    4374             :                                 break;
    4375           0 :                         path->slots[0]--;
    4376             :                 }
    4377           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
    4378             :                                       path->slots[0]);
    4379           0 :                 btrfs_release_path(path);
    4380           0 :                 if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
    4381             :                         break;
    4382             : 
    4383           0 :                 log = btrfs_read_fs_root(log_root_tree, &found_key);
    4384           0 :                 if (IS_ERR(log)) {
    4385           0 :                         ret = PTR_ERR(log);
    4386           0 :                         btrfs_error(fs_info, ret,
    4387             :                                     "Couldn't read tree log root.");
    4388           0 :                         goto error;
    4389             :                 }
    4390             : 
    4391           0 :                 tmp_key.objectid = found_key.offset;
    4392           0 :                 tmp_key.type = BTRFS_ROOT_ITEM_KEY;
    4393           0 :                 tmp_key.offset = (u64)-1;
    4394             : 
    4395           0 :                 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
    4396           0 :                 if (IS_ERR(wc.replay_dest)) {
    4397           0 :                         ret = PTR_ERR(wc.replay_dest);
    4398           0 :                         free_extent_buffer(log->node);
    4399           0 :                         free_extent_buffer(log->commit_root);
    4400           0 :                         kfree(log);
    4401           0 :                         btrfs_error(fs_info, ret, "Couldn't read target root "
    4402             :                                     "for tree log recovery.");
    4403           0 :                         goto error;
    4404             :                 }
    4405             : 
    4406           0 :                 wc.replay_dest->log_root = log;
    4407           0 :                 btrfs_record_root_in_trans(trans, wc.replay_dest);
    4408           0 :                 ret = walk_log_tree(trans, log, &wc);
    4409             : 
    4410           0 :                 if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
    4411           0 :                         ret = fixup_inode_link_counts(trans, wc.replay_dest,
    4412             :                                                       path);
    4413             :                 }
    4414             : 
    4415           0 :                 key.offset = found_key.offset - 1;
    4416           0 :                 wc.replay_dest->log_root = NULL;
    4417           0 :                 free_extent_buffer(log->node);
    4418           0 :                 free_extent_buffer(log->commit_root);
    4419           0 :                 kfree(log);
    4420             : 
    4421           0 :                 if (ret)
    4422             :                         goto error;
    4423             : 
    4424           0 :                 if (found_key.offset == 0)
    4425             :                         break;
    4426             :         }
    4427           0 :         btrfs_release_path(path);
    4428             : 
    4429             :         /* step one is to pin it all, step two is to replay just inodes */
    4430           0 :         if (wc.pin) {
    4431           0 :                 wc.pin = 0;
    4432           0 :                 wc.process_func = replay_one_buffer;
    4433           0 :                 wc.stage = LOG_WALK_REPLAY_INODES;
    4434           0 :                 goto again;
    4435             :         }
    4436             :         /* step three is to replay everything */
    4437           0 :         if (wc.stage < LOG_WALK_REPLAY_ALL) {
    4438           0 :                 wc.stage++;
    4439           0 :                 goto again;
    4440             :         }
    4441             : 
    4442           0 :         btrfs_free_path(path);
    4443             : 
    4444             :         /* step 4: commit the transaction, which also unpins the blocks */
    4445           0 :         ret = btrfs_commit_transaction(trans, fs_info->tree_root);
    4446           0 :         if (ret)
    4447             :                 return ret;
    4448             : 
    4449           0 :         free_extent_buffer(log_root_tree->node);
    4450           0 :         log_root_tree->log_root = NULL;
    4451           0 :         fs_info->log_root_recovering = 0;
    4452           0 :         kfree(log_root_tree);
    4453             : 
    4454           0 :         return 0;
    4455             : error:
    4456           0 :         if (wc.trans)
    4457           0 :                 btrfs_end_transaction(wc.trans, fs_info->tree_root);
    4458           0 :         btrfs_free_path(path);
    4459           0 :         return ret;
    4460             : }
    4461             : 
    4462             : /*
    4463             :  * there are some corner cases where we want to force a full
    4464             :  * commit instead of allowing a directory to be logged.
    4465             :  *
    4466             :  * They revolve around files there were unlinked from the directory, and
    4467             :  * this function updates the parent directory so that a full commit is
    4468             :  * properly done if it is fsync'd later after the unlinks are done.
    4469             :  */
    4470       10742 : void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
    4471             :                              struct inode *dir, struct inode *inode,
    4472             :                              int for_rename)
    4473             : {
    4474             :         /*
    4475             :          * when we're logging a file, if it hasn't been renamed
    4476             :          * or unlinked, and its inode is fully committed on disk,
    4477             :          * we don't have to worry about walking up the directory chain
    4478             :          * to log its parents.
    4479             :          *
    4480             :          * So, we use the last_unlink_trans field to put this transid
    4481             :          * into the file.  When the file is logged we check it and
    4482             :          * don't log the parents if the file is fully on disk.
    4483             :          */
    4484       10742 :         if (S_ISREG(inode->i_mode))
    4485        6303 :                 BTRFS_I(inode)->last_unlink_trans = trans->transid;
    4486             : 
    4487             :         /*
    4488             :          * if this directory was already logged any new
    4489             :          * names for this file/dir will get recorded
    4490             :          */
    4491       10742 :         smp_mb();
    4492       10742 :         if (BTRFS_I(dir)->logged_trans == trans->transid)
    4493             :                 return;
    4494             : 
    4495             :         /*
    4496             :          * if the inode we're about to unlink was logged,
    4497             :          * the log will be properly updated for any new names
    4498             :          */
    4499       10617 :         if (BTRFS_I(inode)->logged_trans == trans->transid)
    4500             :                 return;
    4501             : 
    4502             :         /*
    4503             :          * when renaming files across directories, if the directory
    4504             :          * there we're unlinking from gets fsync'd later on, there's
    4505             :          * no way to find the destination directory later and fsync it
    4506             :          * properly.  So, we have to be conservative and force commits
    4507             :          * so the new name gets discovered.
    4508             :          */
    4509       10607 :         if (for_rename)
    4510             :                 goto record;
    4511             : 
    4512             :         /* we can safely do the unlink without any special recording */
    4513             :         return;
    4514             : 
    4515             : record:
    4516        2042 :         BTRFS_I(dir)->last_unlink_trans = trans->transid;
    4517             : }
    4518             : 
    4519             : /*
    4520             :  * Call this after adding a new name for a file and it will properly
    4521             :  * update the log to reflect the new name.
    4522             :  *
    4523             :  * It will return zero if all goes well, and it will return 1 if a
    4524             :  * full transaction commit is required.
    4525             :  */
    4526        5884 : int btrfs_log_new_name(struct btrfs_trans_handle *trans,
    4527             :                         struct inode *inode, struct inode *old_dir,
    4528             :                         struct dentry *parent)
    4529             : {
    4530        5884 :         struct btrfs_root * root = BTRFS_I(inode)->root;
    4531             : 
    4532             :         /*
    4533             :          * this will force the logging code to walk the dentry chain
    4534             :          * up for the file
    4535             :          */
    4536        5884 :         if (S_ISREG(inode->i_mode))
    4537        3538 :                 BTRFS_I(inode)->last_unlink_trans = trans->transid;
    4538             : 
    4539             :         /*
    4540             :          * if this inode hasn't been logged and directory we're renaming it
    4541             :          * from hasn't been logged, we don't need to log it
    4542             :          */
    4543       11768 :         if (BTRFS_I(inode)->logged_trans <=
    4544       11723 :             root->fs_info->last_trans_committed &&
    4545        2280 :             (!old_dir || BTRFS_I(old_dir)->logged_trans <=
    4546             :                     root->fs_info->last_trans_committed))
    4547             :                 return 0;
    4548             : 
    4549         113 :         return btrfs_log_inode_parent(trans, root, inode, parent, 0,
    4550             :                                       LLONG_MAX, 1, NULL);
    4551             : }
    4552             : 

Generated by: LCOV version 1.10