LCOV - btrfstest.info - fs/btrfs/inode.c

LCOV - code coverage report

Current view:	top level - fs/btrfs - inode.c (source / functions)		Hit	Total	Coverage
Test:	btrfstest.info	Lines:	2595	3379	76.8 %
Date:	2014-11-28	Functions:	130	142	91.5 %

          Line data    Source code

       1             : /*
       2             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : 
      19             : #include <linux/kernel.h>
      20             : #include <linux/bio.h>
      21             : #include <linux/buffer_head.h>
      22             : #include <linux/file.h>
      23             : #include <linux/fs.h>
      24             : #include <linux/pagemap.h>
      25             : #include <linux/highmem.h>
      26             : #include <linux/time.h>
      27             : #include <linux/init.h>
      28             : #include <linux/string.h>
      29             : #include <linux/backing-dev.h>
      30             : #include <linux/mpage.h>
      31             : #include <linux/swap.h>
      32             : #include <linux/writeback.h>
      33             : #include <linux/statfs.h>
      34             : #include <linux/compat.h>
      35             : #include <linux/aio.h>
      36             : #include <linux/bit_spinlock.h>
      37             : #include <linux/xattr.h>
      38             : #include <linux/posix_acl.h>
      39             : #include <linux/falloc.h>
      40             : #include <linux/slab.h>
      41             : #include <linux/ratelimit.h>
      42             : #include <linux/mount.h>
      43             : #include <linux/btrfs.h>
      44             : #include <linux/blkdev.h>
      45             : #include <linux/posix_acl_xattr.h>
      46             : #include "ctree.h"
      47             : #include "disk-io.h"
      48             : #include "transaction.h"
      49             : #include "btrfs_inode.h"
      50             : #include "print-tree.h"
      51             : #include "ordered-data.h"
      52             : #include "xattr.h"
      53             : #include "tree-log.h"
      54             : #include "volumes.h"
      55             : #include "compression.h"
      56             : #include "locking.h"
      57             : #include "free-space-cache.h"
      58             : #include "inode-map.h"
      59             : #include "backref.h"
      60             : #include "hash.h"
      61             : #include "props.h"
      62             : 
      63             : struct btrfs_iget_args {
      64             :         struct btrfs_key *location;
      65             :         struct btrfs_root *root;
      66             : };
      67             : 
      68             : static const struct inode_operations btrfs_dir_inode_operations;
      69             : static const struct inode_operations btrfs_symlink_inode_operations;
      70             : static const struct inode_operations btrfs_dir_ro_inode_operations;
      71             : static const struct inode_operations btrfs_special_inode_operations;
      72             : static const struct inode_operations btrfs_file_inode_operations;
      73             : static const struct address_space_operations btrfs_aops;
      74             : static const struct address_space_operations btrfs_symlink_aops;
      75             : static const struct file_operations btrfs_dir_file_operations;
      76             : static struct extent_io_ops btrfs_extent_io_ops;
      77             : 
      78             : static struct kmem_cache *btrfs_inode_cachep;
      79             : static struct kmem_cache *btrfs_delalloc_work_cachep;
      80             : struct kmem_cache *btrfs_trans_handle_cachep;
      81             : struct kmem_cache *btrfs_transaction_cachep;
      82             : struct kmem_cache *btrfs_path_cachep;
      83             : struct kmem_cache *btrfs_free_space_cachep;
      84             : 
      85             : #define S_SHIFT 12
      86             : static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
      87             :         [S_IFREG >> S_SHIFT]      = BTRFS_FT_REG_FILE,
      88             :         [S_IFDIR >> S_SHIFT]      = BTRFS_FT_DIR,
      89             :         [S_IFCHR >> S_SHIFT]      = BTRFS_FT_CHRDEV,
      90             :         [S_IFBLK >> S_SHIFT]      = BTRFS_FT_BLKDEV,
      91             :         [S_IFIFO >> S_SHIFT]      = BTRFS_FT_FIFO,
      92             :         [S_IFSOCK >> S_SHIFT]     = BTRFS_FT_SOCK,
      93             :         [S_IFLNK >> S_SHIFT]      = BTRFS_FT_SYMLINK,
      94             : };
      95             : 
      96             : static int btrfs_setsize(struct inode *inode, struct iattr *attr);
      97             : static int btrfs_truncate(struct inode *inode);
      98             : static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
      99             : static noinline int cow_file_range(struct inode *inode,
     100             :                                    struct page *locked_page,
     101             :                                    u64 start, u64 end, int *page_started,
     102             :                                    unsigned long *nr_written, int unlock);
     103             : static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
     104             :                                            u64 len, u64 orig_start,
     105             :                                            u64 block_start, u64 block_len,
     106             :                                            u64 orig_block_len, u64 ram_bytes,
     107             :                                            int type);
     108             : 
     109             : static int btrfs_dirty_inode(struct inode *inode);
     110             : 
     111       20423 : static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
     112             :                                      struct inode *inode,  struct inode *dir,
     113             :                                      const struct qstr *qstr)
     114             : {
     115             :         int err;
     116             : 
     117       20423 :         err = btrfs_init_acl(trans, inode, dir);
     118       20423 :         if (!err)
     119       20423 :                 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
     120       20423 :         return err;
     121             : }
     122             : 
     123             : /*
     124             :  * this does all the hard work for inserting an inline extent into
     125             :  * the btree.  The caller should have done a btrfs_drop_extents so that
     126             :  * no overlapping inline items exist in the btree
     127             :  */
     128        3851 : static int insert_inline_extent(struct btrfs_trans_handle *trans,
     129             :                                 struct btrfs_path *path, int extent_inserted,
     130             :                                 struct btrfs_root *root, struct inode *inode,
     131             :                                 u64 start, size_t size, size_t compressed_size,
     132             :                                 int compress_type,
     133             :                                 struct page **compressed_pages)
     134             : {
     135             :         struct extent_buffer *leaf;
     136             :         struct page *page = NULL;
     137             :         char *kaddr;
     138             :         unsigned long ptr;
     139             :         struct btrfs_file_extent_item *ei;
     140             :         int err = 0;
     141             :         int ret;
     142             :         size_t cur_size = size;
     143             :         unsigned long offset;
     144             : 
     145        3851 :         if (compressed_size && compressed_pages)
     146             :                 cur_size = compressed_size;
     147             : 
     148        3851 :         inode_add_bytes(inode, size);
     149             : 
     150        3851 :         if (!extent_inserted) {
     151             :                 struct btrfs_key key;
     152             :                 size_t datasize;
     153             : 
     154        2586 :                 key.objectid = btrfs_ino(inode);
     155        2586 :                 key.offset = start;
     156             :                 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
     157             : 
     158        2586 :                 datasize = btrfs_file_extent_calc_inline_size(cur_size);
     159        2586 :                 path->leave_spinning = 1;
     160             :                 ret = btrfs_insert_empty_item(trans, root, path, &key,
     161             :                                               datasize);
     162        2586 :                 if (ret) {
     163             :                         err = ret;
     164           0 :                         goto fail;
     165             :                 }
     166             :         }
     167        3851 :         leaf = path->nodes[0];
     168        7702 :         ei = btrfs_item_ptr(leaf, path->slots[0],
     169             :                             struct btrfs_file_extent_item);
     170        3851 :         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
     171             :         btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
     172             :         btrfs_set_file_extent_encryption(leaf, ei, 0);
     173             :         btrfs_set_file_extent_other_encoding(leaf, ei, 0);
     174             :         btrfs_set_file_extent_ram_bytes(leaf, ei, size);
     175             :         ptr = btrfs_file_extent_inline_start(ei);
     176             : 
     177        3851 :         if (compress_type != BTRFS_COMPRESS_NONE) {
     178             :                 struct page *cpage;
     179             :                 int i = 0;
     180           0 :                 while (compressed_size > 0) {
     181           0 :                         cpage = compressed_pages[i];
     182           0 :                         cur_size = min_t(unsigned long, compressed_size,
     183             :                                        PAGE_CACHE_SIZE);
     184             : 
     185             :                         kaddr = kmap_atomic(cpage);
     186           0 :                         write_extent_buffer(leaf, kaddr, ptr, cur_size);
     187             :                         kunmap_atomic(kaddr);
     188             : 
     189           0 :                         i++;
     190           0 :                         ptr += cur_size;
     191           0 :                         compressed_size -= cur_size;
     192             :                 }
     193           0 :                 btrfs_set_file_extent_compression(leaf, ei,
     194             :                                                   compress_type);
     195             :         } else {
     196        3851 :                 page = find_get_page(inode->i_mapping,
     197        3851 :                                      start >> PAGE_CACHE_SHIFT);
     198             :                 btrfs_set_file_extent_compression(leaf, ei, 0);
     199             :                 kaddr = kmap_atomic(page);
     200        3851 :                 offset = start & (PAGE_CACHE_SIZE - 1);
     201        3851 :                 write_extent_buffer(leaf, kaddr + offset, ptr, size);
     202             :                 kunmap_atomic(kaddr);
     203        3851 :                 page_cache_release(page);
     204             :         }
     205        3851 :         btrfs_mark_buffer_dirty(leaf);
     206        3851 :         btrfs_release_path(path);
     207             : 
     208             :         /*
     209             :          * we're an inline extent, so nobody can
     210             :          * extend the file past i_size without locking
     211             :          * a page we already have locked.
     212             :          *
     213             :          * We must do any isize and inode updates
     214             :          * before we unlock the pages.  Otherwise we
     215             :          * could end up racing with unlink.
     216             :          */
     217        3851 :         BTRFS_I(inode)->disk_i_size = inode->i_size;
     218        3851 :         ret = btrfs_update_inode(trans, root, inode);
     219             : 
     220        3851 :         return ret;
     221             : fail:
     222           0 :         return err;
     223             : }
     224             : 
     225             : 
     226             : /*
     227             :  * conditionally insert an inline extent into the file.  This
     228             :  * does the checks required to make sure the data is small enough
     229             :  * to fit as an inline extent.
     230             :  */
     231       11214 : static noinline int cow_file_range_inline(struct btrfs_root *root,
     232       11214 :                                           struct inode *inode, u64 start,
     233             :                                           u64 end, size_t compressed_size,
     234             :                                           int compress_type,
     235             :                                           struct page **compressed_pages)
     236             : {
     237             :         struct btrfs_trans_handle *trans;
     238       11214 :         u64 isize = i_size_read(inode);
     239       11214 :         u64 actual_end = min(end + 1, isize);
     240       11214 :         u64 inline_len = actual_end - start;
     241       11214 :         u64 aligned_end = ALIGN(end, root->sectorsize);
     242             :         u64 data_len = inline_len;
     243             :         int ret;
     244             :         struct btrfs_path *path;
     245       11214 :         int extent_inserted = 0;
     246             :         u32 extent_item_size;
     247             : 
     248       11214 :         if (compressed_size)
     249             :                 data_len = compressed_size;
     250             : 
     251       22428 :         if (start > 0 ||
     252       15065 :             actual_end >= PAGE_CACHE_SIZE ||
     253        7702 :             data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
     254        3851 :             (!compressed_size &&
     255        7702 :             (actual_end & (root->sectorsize - 1)) == 0) ||
     256        3851 :             end + 1 < isize ||
     257        3851 :             data_len > root->fs_info->max_inline) {
     258             :                 return 1;
     259             :         }
     260             : 
     261        3851 :         path = btrfs_alloc_path();
     262        3851 :         if (!path)
     263             :                 return -ENOMEM;
     264             : 
     265        3851 :         trans = btrfs_join_transaction(root);
     266        3851 :         if (IS_ERR(trans)) {
     267           0 :                 btrfs_free_path(path);
     268           0 :                 return PTR_ERR(trans);
     269             :         }
     270        3851 :         trans->block_rsv = &root->fs_info->delalloc_block_rsv;
     271             : 
     272        3851 :         if (compressed_size && compressed_pages)
     273           0 :                 extent_item_size = btrfs_file_extent_calc_inline_size(
     274             :                    compressed_size);
     275             :         else
     276        3851 :                 extent_item_size = btrfs_file_extent_calc_inline_size(
     277             :                     inline_len);
     278             : 
     279        3851 :         ret = __btrfs_drop_extents(trans, root, inode, path,
     280             :                                    start, aligned_end, NULL,
     281             :                                    1, 1, extent_item_size, &extent_inserted);
     282        3851 :         if (ret) {
     283           0 :                 btrfs_abort_transaction(trans, root, ret);
     284           0 :                 goto out;
     285             :         }
     286             : 
     287        3851 :         if (isize > actual_end)
     288           0 :                 inline_len = min_t(u64, isize, actual_end);
     289        3851 :         ret = insert_inline_extent(trans, path, extent_inserted,
     290             :                                    root, inode, start,
     291             :                                    inline_len, compressed_size,
     292             :                                    compress_type, compressed_pages);
     293        3851 :         if (ret && ret != -ENOSPC) {
     294           0 :                 btrfs_abort_transaction(trans, root, ret);
     295           0 :                 goto out;
     296        3851 :         } else if (ret == -ENOSPC) {
     297             :                 ret = 1;
     298             :                 goto out;
     299             :         }
     300             : 
     301             :         set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
     302        3851 :         btrfs_delalloc_release_metadata(inode, end + 1 - start);
     303        3851 :         btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
     304             : out:
     305        3851 :         btrfs_free_path(path);
     306        3851 :         btrfs_end_transaction(trans, root);
     307        3851 :         return ret;
     308             : }
     309             : 
     310             : struct async_extent {
     311             :         u64 start;
     312             :         u64 ram_size;
     313             :         u64 compressed_size;
     314             :         struct page **pages;
     315             :         unsigned long nr_pages;
     316             :         int compress_type;
     317             :         struct list_head list;
     318             : };
     319             : 
     320             : struct async_cow {
     321             :         struct inode *inode;
     322             :         struct btrfs_root *root;
     323             :         struct page *locked_page;
     324             :         u64 start;
     325             :         u64 end;
     326             :         struct list_head extents;
     327             :         struct btrfs_work work;
     328             : };
     329             : 
     330         234 : static noinline int add_async_extent(struct async_cow *cow,
     331             :                                      u64 start, u64 ram_size,
     332             :                                      u64 compressed_size,
     333             :                                      struct page **pages,
     334             :                                      unsigned long nr_pages,
     335             :                                      int compress_type)
     336             : {
     337             :         struct async_extent *async_extent;
     338             : 
     339             :         async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
     340         234 :         BUG_ON(!async_extent); /* -ENOMEM */
     341         234 :         async_extent->start = start;
     342         234 :         async_extent->ram_size = ram_size;
     343         234 :         async_extent->compressed_size = compressed_size;
     344         234 :         async_extent->pages = pages;
     345         234 :         async_extent->nr_pages = nr_pages;
     346         234 :         async_extent->compress_type = compress_type;
     347         234 :         list_add_tail(&async_extent->list, &cow->extents);
     348         234 :         return 0;
     349             : }
     350             : 
     351             : /*
     352             :  * we create compressed extents in two phases.  The first
     353             :  * phase compresses a range of pages that have already been
     354             :  * locked (both pages and state bits are locked).
     355             :  *
     356             :  * This is done inside an ordered work queue, and the compression
     357             :  * is spread across many cpus.  The actual IO submission is step
     358             :  * two, and the ordered work queue takes care of making sure that
     359             :  * happens in the same order things were put onto the queue by
     360             :  * writepages and friends.
     361             :  *
     362             :  * If this code finds it can't get good compression, it puts an
     363             :  * entry onto the work queue to write the uncompressed bytes.  This
     364             :  * makes sure that both compressed inodes and uncompressed inodes
     365             :  * are written in the same order that the flusher thread sent them
     366             :  * down.
     367             :  */
     368         422 : static noinline int compress_file_range(struct inode *inode,
     369          81 :                                         struct page *locked_page,
     370             :                                         u64 start, u64 end,
     371             :                                         struct async_cow *async_cow,
     372             :                                         int *num_added)
     373             : {
     374         211 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     375             :         u64 num_bytes;
     376         211 :         u64 blocksize = root->sectorsize;
     377             :         u64 actual_end;
     378         211 :         u64 isize = i_size_read(inode);
     379             :         int ret = 0;
     380             :         struct page **pages = NULL;
     381             :         unsigned long nr_pages;
     382         211 :         unsigned long nr_pages_ret = 0;
     383         211 :         unsigned long total_compressed = 0;
     384         211 :         unsigned long total_in = 0;
     385             :         unsigned long max_compressed = 128 * 1024;
     386             :         unsigned long max_uncompressed = 128 * 1024;
     387             :         int i;
     388             :         int will_compress;
     389         211 :         int compress_type = root->fs_info->compress_type;
     390             :         int redirty = 0;
     391             : 
     392             :         /* if this is a small write inside eof, kick off a defrag */
     393         211 :         if ((end - start + 1) < 16 * 1024 &&
     394           0 :             (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
     395          90 :                 btrfs_add_inode_defrag(NULL, inode);
     396             : 
     397             :         /*
     398             :          * skip compression for a small file range(<=blocksize) that
     399             :          * isn't an inline extent, since it dosen't save disk space at all.
     400             :          */
     401         211 :         if ((end - start + 1) <= blocksize &&
     402           0 :             (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
     403             :                 goto cleanup_and_bail_uncompressed;
     404             : 
     405         133 :         actual_end = min_t(u64, isize, end + 1);
     406             : again:
     407             :         will_compress = 0;
     408         156 :         nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
     409         156 :         nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE);
     410             : 
     411             :         /*
     412             :          * we don't want to send crud past the end of i_size through
     413             :          * compression, that's just a waste of CPU time.  So, if the
     414             :          * end of the file is before the start of our current
     415             :          * requested range of bytes, we bail out to the uncompressed
     416             :          * cleanup code that can deal with all of this.
     417             :          *
     418             :          * It isn't really the fastest way to fix things, but this is a
     419             :          * very uncommon corner.
     420             :          */
     421         156 :         if (actual_end <= start)
     422             :                 goto cleanup_and_bail_uncompressed;
     423             : 
     424         156 :         total_compressed = actual_end - start;
     425             : 
     426             :         /* we want to make sure that amount of ram required to uncompress
     427             :          * an extent is reasonable, so we limit the total size in ram
     428             :          * of a compressed extent to 128k.  This is a crucial number
     429             :          * because it also controls how easily we can spread reads across
     430             :          * cpus for decompression.
     431             :          *
     432             :          * We also want to make sure the amount of IO required to do
     433             :          * a random read is reasonably small, so we limit the size of
     434             :          * a compressed extent to 128k.
     435             :          */
     436         156 :         total_compressed = min(total_compressed, max_uncompressed);
     437         156 :         num_bytes = ALIGN(end - start + 1, blocksize);
     438         156 :         num_bytes = max(blocksize,  num_bytes);
     439         156 :         total_in = 0;
     440             :         ret = 0;
     441             : 
     442             :         /*
     443             :          * we do compression for mount -o compress and when the
     444             :          * inode has not been flagged as nocompress.  This flag can
     445             :          * change at any time if we discover bad compression ratios.
     446             :          */
     447         311 :         if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
     448         183 :             (btrfs_test_opt(root, COMPRESS) ||
     449          28 :              (BTRFS_I(inode)->force_compress) ||
     450           0 :              (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
     451             :                 WARN_ON(pages);
     452         155 :                 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
     453         155 :                 if (!pages) {
     454             :                         /* just bail out to the uncompressed code */
     455             :                         goto cont;
     456             :                 }
     457             : 
     458         155 :                 if (BTRFS_I(inode)->force_compress)
     459          28 :                         compress_type = BTRFS_I(inode)->force_compress;
     460             : 
     461             :                 /*
     462             :                  * we need to call clear_page_dirty_for_io on each
     463             :                  * page in the range.  Otherwise applications with the file
     464             :                  * mmap'd can wander in and change the page contents while
     465             :                  * we are compressing them.
     466             :                  *
     467             :                  * If the compression fails for any reason, we set the pages
     468             :                  * dirty again later on.
     469             :                  */
     470         155 :                 extent_range_clear_dirty_for_io(inode, start, end);
     471             :                 redirty = 1;
     472         155 :                 ret = btrfs_compress_pages(compress_type,
     473             :                                            inode->i_mapping, start,
     474             :                                            total_compressed, pages,
     475             :                                            nr_pages, &nr_pages_ret,
     476             :                                            &total_in,
     477             :                                            &total_compressed,
     478             :                                            max_compressed);
     479             : 
     480         155 :                 if (!ret) {
     481         155 :                         unsigned long offset = total_compressed &
     482             :                                 (PAGE_CACHE_SIZE - 1);
     483         155 :                         struct page *page = pages[nr_pages_ret - 1];
     484             :                         char *kaddr;
     485             : 
     486             :                         /* zero the tail end of the last page, we might be
     487             :                          * sending it down to disk
     488             :                          */
     489         155 :                         if (offset) {
     490             :                                 kaddr = kmap_atomic(page);
     491         155 :                                 memset(kaddr + offset, 0,
     492             :                                        PAGE_CACHE_SIZE - offset);
     493             :                                 kunmap_atomic(kaddr);
     494             :                         }
     495             :                         will_compress = 1;
     496             :                 }
     497             :         }
     498             : cont:
     499         156 :         if (start == 0) {
     500             :                 /* lets try to make an inline extent */
     501           6 :                 if (ret || total_in < (actual_end - start)) {
     502             :                         /* we didn't compress the entire range, try
     503             :                          * to make an uncompressed inline extent.
     504             :                          */
     505           4 :                         ret = cow_file_range_inline(root, inode, start, end,
     506             :                                                     0, 0, NULL);
     507             :                 } else {
     508             :                         /* try making a compressed inline extent */
     509           2 :                         ret = cow_file_range_inline(root, inode, start, end,
     510             :                                                     total_compressed,
     511             :                                                     compress_type, pages);
     512             :                 }
     513           6 :                 if (ret <= 0) {
     514             :                         unsigned long clear_flags = EXTENT_DELALLOC |
     515             :                                 EXTENT_DEFRAG;
     516           0 :                         clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
     517             : 
     518             :                         /*
     519             :                          * inline extent creation worked or returned error,
     520             :                          * we don't need to create any more async work items.
     521             :                          * Unlock and free up our temp pages.
     522             :                          */
     523           0 :                         extent_clear_unlock_delalloc(inode, start, end, NULL,
     524             :                                                      clear_flags, PAGE_UNLOCK |
     525             :                                                      PAGE_CLEAR_DIRTY |
     526             :                                                      PAGE_SET_WRITEBACK |
     527             :                                                      PAGE_END_WRITEBACK);
     528             :                         goto free_pages_out;
     529             :                 }
     530             :         }
     531             : 
     532         156 :         if (will_compress) {
     533             :                 /*
     534             :                  * we aren't doing an inline extent round the compressed size
     535             :                  * up to a block size boundary so the allocator does sane
     536             :                  * things
     537             :                  */
     538         155 :                 total_compressed = ALIGN(total_compressed, blocksize);
     539             : 
     540             :                 /*
     541             :                  * one last check to make sure the compression is really a
     542             :                  * win, compare the page count read with the blocks on disk
     543             :                  */
     544         155 :                 total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
     545         155 :                 if (total_compressed >= total_in) {
     546             :                         will_compress = 0;
     547             :                 } else {
     548             :                         num_bytes = total_in;
     549             :                 }
     550             :         }
     551         156 :         if (!will_compress && pages) {
     552             :                 /*
     553             :                  * the compression code ran but failed to make things smaller,
     554             :                  * free any pages it allocated and our page pointer array
     555             :                  */
     556           2 :                 for (i = 0; i < nr_pages_ret; i++) {
     557           2 :                         WARN_ON(pages[i]->mapping);
     558           2 :                         page_cache_release(pages[i]);
     559             :                 }
     560           2 :                 kfree(pages);
     561             :                 pages = NULL;
     562           2 :                 total_compressed = 0;
     563           2 :                 nr_pages_ret = 0;
     564             : 
     565             :                 /* flag the file so we don't compress in the future */
     566           4 :                 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
     567           2 :                     !(BTRFS_I(inode)->force_compress)) {
     568           2 :                         BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
     569             :                 }
     570             :         }
     571         156 :         if (will_compress) {
     572         153 :                 *num_added += 1;
     573             : 
     574             :                 /* the async work queues will take care of doing actual
     575             :                  * allocation on disk for these compressed pages,
     576             :                  * and will submit them to the elevator.
     577             :                  */
     578         153 :                 add_async_extent(async_cow, start, num_bytes,
     579             :                                  total_compressed, pages, nr_pages_ret,
     580             :                                  compress_type);
     581             : 
     582         153 :                 if (start + num_bytes < end) {
     583             :                         start += num_bytes;
     584             :                         pages = NULL;
     585          23 :                         cond_resched();
     586          23 :                         goto again;
     587             :                 }
     588             :         } else {
     589             : cleanup_and_bail_uncompressed:
     590             :                 /*
     591             :                  * No compression, but we still need to write the pages in
     592             :                  * the file we've been given so far.  redirty the locked
     593             :                  * page if it corresponds to our extent and set things up
     594             :                  * for the async work queue to run cow_file_range to do
     595             :                  * the normal delalloc dance
     596             :                  */
     597          81 :                 if (page_offset(locked_page) >= start &&
     598             :                     page_offset(locked_page) <= end) {
     599          79 :                         __set_page_dirty_nobuffers(locked_page);
     600             :                         /* unlocked later on in the async handlers */
     601             :                 }
     602          81 :                 if (redirty)
     603           2 :                         extent_range_redirty_for_io(inode, start, end);
     604          81 :                 add_async_extent(async_cow, start, end - start + 1,
     605             :                                  0, NULL, 0, BTRFS_COMPRESS_NONE);
     606          81 :                 *num_added += 1;
     607             :         }
     608             : 
     609             : out:
     610         211 :         return ret;
     611             : 
     612             : free_pages_out:
     613           0 :         for (i = 0; i < nr_pages_ret; i++) {
     614           0 :                 WARN_ON(pages[i]->mapping);
     615           0 :                 page_cache_release(pages[i]);
     616             :         }
     617           0 :         kfree(pages);
     618             : 
     619           0 :         goto out;
     620             : }
     621             : 
     622             : /*
     623             :  * phase two of compressed writeback.  This is the ordered portion
     624             :  * of the code, which only gets called in the order the work was
     625             :  * queued.  We walk all the async extents created by compress_file_range
     626             :  * and send them down to the disk.
     627             :  */
     628         211 : static noinline int submit_compressed_extents(struct inode *inode,
     629             :                                               struct async_cow *async_cow)
     630             : {
     631             :         struct async_extent *async_extent;
     632             :         u64 alloc_hint = 0;
     633             :         struct btrfs_key ins;
     634             :         struct extent_map *em;
     635         211 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     636         211 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
     637             :         struct extent_io_tree *io_tree;
     638             :         int ret = 0;
     639             : 
     640         422 :         if (list_empty(&async_cow->extents))
     641             :                 return 0;
     642             : 
     643             : again:
     644         445 :         while (!list_empty(&async_cow->extents)) {
     645         234 :                 async_extent = list_entry(async_cow->extents.next,
     646             :                                           struct async_extent, list);
     647         234 :                 list_del(&async_extent->list);
     648             : 
     649         234 :                 io_tree = &BTRFS_I(inode)->io_tree;
     650             : 
     651             : retry:
     652             :                 /* did the compression code fall back to uncompressed IO? */
     653         234 :                 if (!async_extent->pages) {
     654          81 :                         int page_started = 0;
     655          81 :                         unsigned long nr_written = 0;
     656             : 
     657          81 :                         lock_extent(io_tree, async_extent->start,
     658         162 :                                          async_extent->start +
     659          81 :                                          async_extent->ram_size - 1);
     660             : 
     661             :                         /* allocate blocks */
     662          81 :                         ret = cow_file_range(inode, async_cow->locked_page,
     663             :                                              async_extent->start,
     664         162 :                                              async_extent->start +
     665          81 :                                              async_extent->ram_size - 1,
     666             :                                              &page_started, &nr_written, 0);
     667             : 
     668             :                         /* JDM XXX */
     669             : 
     670             :                         /*
     671             :                          * if page_started, cow_file_range inserted an
     672             :                          * inline extent and took care of all the unlocking
     673             :                          * and IO for us.  Otherwise, we need to submit
     674             :                          * all those pages down to the drive.
     675             :                          */
     676          81 :                         if (!page_started && !ret)
     677          81 :                                 extent_write_locked_range(io_tree,
     678             :                                                   inode, async_extent->start,
     679         162 :                                                   async_extent->start +
     680          81 :                                                   async_extent->ram_size - 1,
     681             :                                                   btrfs_get_extent,
     682             :                                                   WB_SYNC_ALL);
     683           0 :                         else if (ret)
     684           0 :                                 unlock_page(async_cow->locked_page);
     685          81 :                         kfree(async_extent);
     686          81 :                         cond_resched();
     687          81 :                         continue;
     688             :                 }
     689             : 
     690         153 :                 lock_extent(io_tree, async_extent->start,
     691         153 :                             async_extent->start + async_extent->ram_size - 1);
     692             : 
     693         153 :                 ret = btrfs_reserve_extent(root,
     694             :                                            async_extent->compressed_size,
     695             :                                            async_extent->compressed_size,
     696             :                                            0, alloc_hint, &ins, 1, 1);
     697         153 :                 if (ret) {
     698             :                         int i;
     699             : 
     700           0 :                         for (i = 0; i < async_extent->nr_pages; i++) {
     701           0 :                                 WARN_ON(async_extent->pages[i]->mapping);
     702           0 :                                 page_cache_release(async_extent->pages[i]);
     703             :                         }
     704           0 :                         kfree(async_extent->pages);
     705           0 :                         async_extent->nr_pages = 0;
     706           0 :                         async_extent->pages = NULL;
     707             : 
     708           0 :                         if (ret == -ENOSPC) {
     709           0 :                                 unlock_extent(io_tree, async_extent->start,
     710           0 :                                               async_extent->start +
     711           0 :                                               async_extent->ram_size - 1);
     712             : 
     713             :                                 /*
     714             :                                  * we need to redirty the pages if we decide to
     715             :                                  * fallback to uncompressed IO, otherwise we
     716             :                                  * will not submit these pages down to lower
     717             :                                  * layers.
     718             :                                  */
     719           0 :                                 extent_range_redirty_for_io(inode,
     720             :                                                 async_extent->start,
     721           0 :                                                 async_extent->start +
     722           0 :                                                 async_extent->ram_size - 1);
     723             : 
     724           0 :                                 goto retry;
     725             :                         }
     726             :                         goto out_free;
     727             :                 }
     728             : 
     729             :                 /*
     730             :                  * here we're doing allocation and writeback of the
     731             :                  * compressed pages
     732             :                  */
     733         153 :                 btrfs_drop_extent_cache(inode, async_extent->start,
     734         306 :                                         async_extent->start +
     735         153 :                                         async_extent->ram_size - 1, 0);
     736             : 
     737         153 :                 em = alloc_extent_map();
     738         153 :                 if (!em) {
     739             :                         ret = -ENOMEM;
     740             :                         goto out_free_reserve;
     741             :                 }
     742         153 :                 em->start = async_extent->start;
     743         153 :                 em->len = async_extent->ram_size;
     744         153 :                 em->orig_start = em->start;
     745         153 :                 em->mod_start = em->start;
     746         153 :                 em->mod_len = em->len;
     747             : 
     748         153 :                 em->block_start = ins.objectid;
     749         153 :                 em->block_len = ins.offset;
     750         153 :                 em->orig_block_len = ins.offset;
     751         153 :                 em->ram_bytes = async_extent->ram_size;
     752         153 :                 em->bdev = root->fs_info->fs_devices->latest_bdev;
     753         153 :                 em->compress_type = async_extent->compress_type;
     754             :                 set_bit(EXTENT_FLAG_PINNED, &em->flags);
     755             :                 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
     756         153 :                 em->generation = -1;
     757             : 
     758             :                 while (1) {
     759         153 :                         write_lock(&em_tree->lock);
     760         153 :                         ret = add_extent_mapping(em_tree, em, 1);
     761             :                         write_unlock(&em_tree->lock);
     762         153 :                         if (ret != -EEXIST) {
     763         153 :                                 free_extent_map(em);
     764             :                                 break;
     765             :                         }
     766           0 :                         btrfs_drop_extent_cache(inode, async_extent->start,
     767           0 :                                                 async_extent->start +
     768           0 :                                                 async_extent->ram_size - 1, 0);
     769           0 :                 }
     770             : 
     771         153 :                 if (ret)
     772             :                         goto out_free_reserve;
     773             : 
     774         153 :                 ret = btrfs_add_ordered_extent_compress(inode,
     775             :                                                 async_extent->start,
     776             :                                                 ins.objectid,
     777             :                                                 async_extent->ram_size,
     778             :                                                 ins.offset,
     779             :                                                 BTRFS_ORDERED_COMPRESSED,
     780             :                                                 async_extent->compress_type);
     781         153 :                 if (ret) {
     782           0 :                         btrfs_drop_extent_cache(inode, async_extent->start,
     783           0 :                                                 async_extent->start +
     784           0 :                                                 async_extent->ram_size - 1, 0);
     785           0 :                         goto out_free_reserve;
     786             :                 }
     787             : 
     788             :                 /*
     789             :                  * clear dirty, set writeback and unlock the pages.
     790             :                  */
     791         153 :                 extent_clear_unlock_delalloc(inode, async_extent->start,
     792         306 :                                 async_extent->start +
     793         153 :                                 async_extent->ram_size - 1,
     794             :                                 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
     795             :                                 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
     796             :                                 PAGE_SET_WRITEBACK);
     797         459 :                 ret = btrfs_submit_compressed_write(inode,
     798             :                                     async_extent->start,
     799         153 :                                     async_extent->ram_size,
     800             :                                     ins.objectid,
     801         153 :                                     ins.offset, async_extent->pages,
     802             :                                     async_extent->nr_pages);
     803         153 :                 alloc_hint = ins.objectid + ins.offset;
     804         153 :                 kfree(async_extent);
     805         153 :                 if (ret)
     806             :                         goto out;
     807         153 :                 cond_resched();
     808             :         }
     809             :         ret = 0;
     810             : out:
     811         211 :         return ret;
     812             : out_free_reserve:
     813           0 :         btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
     814             : out_free:
     815           0 :         extent_clear_unlock_delalloc(inode, async_extent->start,
     816           0 :                                      async_extent->start +
     817           0 :                                      async_extent->ram_size - 1,
     818             :                                      NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
     819             :                                      EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
     820             :                                      PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
     821             :                                      PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
     822           0 :         kfree(async_extent);
     823           0 :         goto again;
     824             : }
     825             : 
     826       45642 : static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
     827             :                                       u64 num_bytes)
     828             : {
     829       45642 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
     830             :         struct extent_map *em;
     831             :         u64 alloc_hint = 0;
     832             : 
     833       45642 :         read_lock(&em_tree->lock);
     834       45645 :         em = search_extent_mapping(em_tree, start, num_bytes);
     835       45641 :         if (em) {
     836             :                 /*
     837             :                  * if block start isn't an actual block number then find the
     838             :                  * first block in this inode and use that as a hint.  If that
     839             :                  * block is also bogus then just don't worry about it.
     840             :                  */
     841       38468 :                 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
     842       30273 :                         free_extent_map(em);
     843       30270 :                         em = search_extent_mapping(em_tree, 0, 0);
     844       30272 :                         if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
     845             :                                 alloc_hint = em->block_start;
     846       30272 :                         if (em)
     847       30272 :                                 free_extent_map(em);
     848             :                 } else {
     849             :                         alloc_hint = em->block_start;
     850        8195 :                         free_extent_map(em);
     851             :                 }
     852             :         }
     853             :         read_unlock(&em_tree->lock);
     854             : 
     855       45645 :         return alloc_hint;
     856             : }
     857             : 
     858             : /*
     859             :  * when extent_io.c finds a delayed allocation range in the file,
     860             :  * the call backs end up in this code.  The basic idea is to
     861             :  * allocate extents on disk for the range, and create ordered data structs
     862             :  * in ram to track those extents.
     863             :  *
     864             :  * locked_page is the page that writepage had locked already.  We use
     865             :  * it to make sure we don't do extra locks or unlocks.
     866             :  *
     867             :  * *page_started is set to one if we unlock locked_page and do everything
     868             :  * required to start IO on it.  It may be clean and already done with
     869             :  * IO when we return.
     870             :  */
     871       24236 : static noinline int cow_file_range(struct inode *inode,
     872             :                                    struct page *locked_page,
     873             :                                    u64 start, u64 end, int *page_started,
     874             :                                    unsigned long *nr_written,
     875             :                                    int unlock)
     876             : {
     877       24236 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     878             :         u64 alloc_hint = 0;
     879             :         u64 num_bytes;
     880             :         unsigned long ram_size;
     881             :         u64 disk_num_bytes;
     882             :         u64 cur_alloc_size;
     883       24236 :         u64 blocksize = root->sectorsize;
     884             :         struct btrfs_key ins;
     885             :         struct extent_map *em;
     886       24236 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
     887             :         int ret = 0;
     888             : 
     889       24236 :         if (btrfs_is_free_space_inode(inode)) {
     890           0 :                 WARN_ON_ONCE(1);
     891             :                 ret = -EINVAL;
     892             :                 goto out_unlock;
     893             :         }
     894             : 
     895       24236 :         num_bytes = ALIGN(end - start + 1, blocksize);
     896       24236 :         num_bytes = max(blocksize,  num_bytes);
     897             :         disk_num_bytes = num_bytes;
     898             : 
     899             :         /* if this is a small write inside eof, kick off defrag */
     900       24236 :         if (num_bytes < 64 * 1024 &&
     901        7774 :             (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
     902       11818 :                 btrfs_add_inode_defrag(NULL, inode);
     903             : 
     904       24235 :         if (start == 0) {
     905             :                 /* lets try to make an inline extent */
     906       11208 :                 ret = cow_file_range_inline(root, inode, start, end, 0, 0,
     907             :                                             NULL);
     908       11208 :                 if (ret == 0) {
     909        3851 :                         extent_clear_unlock_delalloc(inode, start, end, NULL,
     910             :                                      EXTENT_LOCKED | EXTENT_DELALLOC |
     911             :                                      EXTENT_DEFRAG, PAGE_UNLOCK |
     912             :                                      PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
     913             :                                      PAGE_END_WRITEBACK);
     914             : 
     915        7702 :                         *nr_written = *nr_written +
     916        3851 :                              (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
     917        3851 :                         *page_started = 1;
     918        3851 :                         goto out;
     919        7357 :                 } else if (ret < 0) {
     920             :                         goto out_unlock;
     921             :                 }
     922             :         }
     923             : 
     924       40768 :         BUG_ON(disk_num_bytes >
     925             :                btrfs_super_total_bytes(root->fs_info->super_copy));
     926             : 
     927       20384 :         alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
     928       20385 :         btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
     929             : 
     930       61153 :         while (disk_num_bytes > 0) {
     931             :                 unsigned long op;
     932             : 
     933             :                 cur_alloc_size = disk_num_bytes;
     934       20384 :                 ret = btrfs_reserve_extent(root, cur_alloc_size,
     935       20384 :                                            root->sectorsize, 0, alloc_hint,
     936             :                                            &ins, 1, 1);
     937       20385 :                 if (ret < 0)
     938             :                         goto out_unlock;
     939             : 
     940       20385 :                 em = alloc_extent_map();
     941       20384 :                 if (!em) {
     942             :                         ret = -ENOMEM;
     943             :                         goto out_reserve;
     944             :                 }
     945       20384 :                 em->start = start;
     946       20384 :                 em->orig_start = em->start;
     947       20384 :                 ram_size = ins.offset;
     948       20384 :                 em->len = ins.offset;
     949       20384 :                 em->mod_start = em->start;
     950       20384 :                 em->mod_len = em->len;
     951             : 
     952       20384 :                 em->block_start = ins.objectid;
     953       20384 :                 em->block_len = ins.offset;
     954       20384 :                 em->orig_block_len = ins.offset;
     955       20384 :                 em->ram_bytes = ram_size;
     956       20384 :                 em->bdev = root->fs_info->fs_devices->latest_bdev;
     957             :                 set_bit(EXTENT_FLAG_PINNED, &em->flags);
     958       20384 :                 em->generation = -1;
     959             : 
     960             :                 while (1) {
     961       20384 :                         write_lock(&em_tree->lock);
     962       20385 :                         ret = add_extent_mapping(em_tree, em, 1);
     963             :                         write_unlock(&em_tree->lock);
     964       20382 :                         if (ret != -EEXIST) {
     965       20382 :                                 free_extent_map(em);
     966             :                                 break;
     967             :                         }
     968           0 :                         btrfs_drop_extent_cache(inode, start,
     969           0 :                                                 start + ram_size - 1, 0);
     970           0 :                 }
     971       20385 :                 if (ret)
     972             :                         goto out_reserve;
     973             : 
     974       20385 :                 cur_alloc_size = ins.offset;
     975       20385 :                 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
     976             :                                                ram_size, cur_alloc_size, 0);
     977       20385 :                 if (ret)
     978             :                         goto out_drop_extent_cache;
     979             : 
     980       20385 :                 if (root->root_key.objectid ==
     981             :                     BTRFS_DATA_RELOC_TREE_OBJECTID) {
     982           0 :                         ret = btrfs_reloc_clone_csums(inode, start,
     983             :                                                       cur_alloc_size);
     984           0 :                         if (ret)
     985             :                                 goto out_drop_extent_cache;
     986             :                 }
     987             : 
     988       20385 :                 if (disk_num_bytes < cur_alloc_size)
     989             :                         break;
     990             : 
     991             :                 /* we're not doing compressed IO, don't unlock the first
     992             :                  * page (which the caller expects to stay locked), don't
     993             :                  * clear any dirty bits and don't set any writeback bits
     994             :                  *
     995             :                  * Do set the Private2 bit so we know this page was properly
     996             :                  * setup for writepage
     997             :                  */
     998       20385 :                 op = unlock ? PAGE_UNLOCK : 0;
     999       20385 :                 op |= PAGE_SET_PRIVATE2;
    1000             : 
    1001       20385 :                 extent_clear_unlock_delalloc(inode, start,
    1002       20385 :                                              start + ram_size - 1, locked_page,
    1003             :                                              EXTENT_LOCKED | EXTENT_DELALLOC,
    1004             :                                              op);
    1005       20385 :                 disk_num_bytes -= cur_alloc_size;
    1006             :                 num_bytes -= cur_alloc_size;
    1007       20385 :                 alloc_hint = ins.objectid + ins.offset;
    1008       20385 :                 start += cur_alloc_size;
    1009             :         }
    1010             : out:
    1011       24236 :         return ret;
    1012             : 
    1013             : out_drop_extent_cache:
    1014           0 :         btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
    1015             : out_reserve:
    1016           0 :         btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
    1017             : out_unlock:
    1018           0 :         extent_clear_unlock_delalloc(inode, start, end, locked_page,
    1019             :                                      EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
    1020             :                                      EXTENT_DELALLOC | EXTENT_DEFRAG,
    1021             :                                      PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
    1022             :                                      PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
    1023           0 :         goto out;
    1024             : }
    1025             : 
    1026             : /*
    1027             :  * work queue call back to started compression on a file and pages
    1028             :  */
    1029         211 : static noinline void async_cow_start(struct btrfs_work *work)
    1030             : {
    1031             :         struct async_cow *async_cow;
    1032         211 :         int num_added = 0;
    1033         211 :         async_cow = container_of(work, struct async_cow, work);
    1034             : 
    1035         211 :         compress_file_range(async_cow->inode, async_cow->locked_page,
    1036             :                             async_cow->start, async_cow->end, async_cow,
    1037             :                             &num_added);
    1038         211 :         if (num_added == 0) {
    1039           0 :                 btrfs_add_delayed_iput(async_cow->inode);
    1040           0 :                 async_cow->inode = NULL;
    1041             :         }
    1042         211 : }
    1043             : 
    1044             : /*
    1045             :  * work queue call back to submit previously compressed pages
    1046             :  */
    1047         211 : static noinline void async_cow_submit(struct btrfs_work *work)
    1048             : {
    1049             :         struct async_cow *async_cow;
    1050             :         struct btrfs_root *root;
    1051             :         unsigned long nr_pages;
    1052             : 
    1053         211 :         async_cow = container_of(work, struct async_cow, work);
    1054             : 
    1055         211 :         root = async_cow->root;
    1056         211 :         nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
    1057             :                 PAGE_CACHE_SHIFT;
    1058             : 
    1059         422 :         if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
    1060         211 :             5 * 1024 * 1024 &&
    1061         211 :             waitqueue_active(&root->fs_info->async_submit_wait))
    1062           0 :                 wake_up(&root->fs_info->async_submit_wait);
    1063             : 
    1064         211 :         if (async_cow->inode)
    1065         211 :                 submit_compressed_extents(async_cow->inode, async_cow);
    1066         211 : }
    1067             : 
    1068         211 : static noinline void async_cow_free(struct btrfs_work *work)
    1069             : {
    1070             :         struct async_cow *async_cow;
    1071         211 :         async_cow = container_of(work, struct async_cow, work);
    1072         211 :         if (async_cow->inode)
    1073         211 :                 btrfs_add_delayed_iput(async_cow->inode);
    1074         211 :         kfree(async_cow);
    1075         211 : }
    1076             : 
    1077         208 : static int cow_file_range_async(struct inode *inode, struct page *locked_page,
    1078             :                                 u64 start, u64 end, int *page_started,
    1079             :                                 unsigned long *nr_written)
    1080             : {
    1081             :         struct async_cow *async_cow;
    1082         208 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1083             :         unsigned long nr_pages;
    1084             :         u64 cur_end;
    1085             :         int limit = 10 * 1024 * 1024;
    1086             : 
    1087         208 :         clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
    1088             :                          1, 0, NULL, GFP_NOFS);
    1089         627 :         while (start < end) {
    1090             :                 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
    1091         211 :                 BUG_ON(!async_cow); /* -ENOMEM */
    1092         211 :                 async_cow->inode = igrab(inode);
    1093         211 :                 async_cow->root = root;
    1094         211 :                 async_cow->locked_page = locked_page;
    1095         211 :                 async_cow->start = start;
    1096             : 
    1097         211 :                 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
    1098             :                         cur_end = end;
    1099             :                 else
    1100         208 :                         cur_end = min(end, start + 512 * 1024 - 1);
    1101             : 
    1102         211 :                 async_cow->end = cur_end;
    1103         211 :                 INIT_LIST_HEAD(&async_cow->extents);
    1104             : 
    1105         211 :                 btrfs_init_work(&async_cow->work,
    1106             :                                 btrfs_delalloc_helper,
    1107             :                                 async_cow_start, async_cow_submit,
    1108             :                                 async_cow_free);
    1109             : 
    1110         211 :                 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
    1111             :                         PAGE_CACHE_SHIFT;
    1112         211 :                 atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
    1113             : 
    1114         211 :                 btrfs_queue_work(root->fs_info->delalloc_workers,
    1115             :                                  &async_cow->work);
    1116             : 
    1117         422 :                 if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {
    1118           0 :                         wait_event(root->fs_info->async_submit_wait,
    1119             :                            (atomic_read(&root->fs_info->async_delalloc_pages) <
    1120             :                             limit));
    1121             :                 }
    1122             : 
    1123         422 :                 while (atomic_read(&root->fs_info->async_submit_draining) &&
    1124             :                       atomic_read(&root->fs_info->async_delalloc_pages)) {
    1125           0 :                         wait_event(root->fs_info->async_submit_wait,
    1126             :                           (atomic_read(&root->fs_info->async_delalloc_pages) ==
    1127             :                            0));
    1128             :                 }
    1129             : 
    1130         211 :                 *nr_written += nr_pages;
    1131         211 :                 start = cur_end + 1;
    1132             :         }
    1133         208 :         *page_started = 1;
    1134         208 :         return 0;
    1135             : }
    1136             : 
    1137        5707 : static noinline int csum_exist_in_range(struct btrfs_root *root,
    1138             :                                         u64 bytenr, u64 num_bytes)
    1139             : {
    1140             :         int ret;
    1141             :         struct btrfs_ordered_sum *sums;
    1142        5707 :         LIST_HEAD(list);
    1143             : 
    1144        5707 :         ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
    1145        5707 :                                        bytenr + num_bytes - 1, &list, 0);
    1146       11414 :         if (ret == 0 && list_empty(&list))
    1147             :                 return 0;
    1148             : 
    1149           0 :         while (!list_empty(&list)) {
    1150           0 :                 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
    1151           0 :                 list_del(&sums->list);
    1152           0 :                 kfree(sums);
    1153             :         }
    1154             :         return 1;
    1155             : }
    1156             : 
    1157             : /*
    1158             :  * when nowcow writeback call back.  This checks for snapshots or COW copies
    1159             :  * of the extents that exist in the file, and COWs the file as required.
    1160             :  *
    1161             :  * If no cow copies or snapshots exist, we write directly to the existing
    1162             :  * blocks on disk
    1163             :  */
    1164       13707 : static noinline int run_delalloc_nocow(struct inode *inode,
    1165             :                                        struct page *locked_page,
    1166             :                               u64 start, u64 end, int *page_started, int force,
    1167             :                               unsigned long *nr_written)
    1168             : {
    1169       19415 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1170             :         struct btrfs_trans_handle *trans;
    1171       14214 :         struct extent_buffer *leaf;
    1172             :         struct btrfs_path *path;
    1173             :         struct btrfs_file_extent_item *fi;
    1174             :         struct btrfs_key found_key;
    1175             :         u64 cow_start;
    1176             :         u64 cur_offset;
    1177             :         u64 extent_end;
    1178             :         u64 extent_offset;
    1179             :         u64 disk_bytenr;
    1180             :         u64 num_bytes;
    1181             :         u64 disk_num_bytes;
    1182             :         u64 ram_bytes;
    1183             :         int extent_type;
    1184             :         int ret, err;
    1185             :         int type;
    1186             :         int nocow;
    1187             :         int check_prev = 1;
    1188             :         bool nolock;
    1189             :         u64 ino = btrfs_ino(inode);
    1190             : 
    1191       13707 :         path = btrfs_alloc_path();
    1192       13707 :         if (!path) {
    1193           0 :                 extent_clear_unlock_delalloc(inode, start, end, locked_page,
    1194             :                                              EXTENT_LOCKED | EXTENT_DELALLOC |
    1195             :                                              EXTENT_DO_ACCOUNTING |
    1196             :                                              EXTENT_DEFRAG, PAGE_UNLOCK |
    1197             :                                              PAGE_CLEAR_DIRTY |
    1198             :                                              PAGE_SET_WRITEBACK |
    1199             :                                              PAGE_END_WRITEBACK);
    1200           0 :                 return -ENOMEM;
    1201             :         }
    1202             : 
    1203       13707 :         nolock = btrfs_is_free_space_inode(inode);
    1204             : 
    1205       13707 :         if (nolock)
    1206        4057 :                 trans = btrfs_join_transaction_nolock(root);
    1207             :         else
    1208        9650 :                 trans = btrfs_join_transaction(root);
    1209             : 
    1210       13707 :         if (IS_ERR(trans)) {
    1211           0 :                 extent_clear_unlock_delalloc(inode, start, end, locked_page,
    1212             :                                              EXTENT_LOCKED | EXTENT_DELALLOC |
    1213             :                                              EXTENT_DO_ACCOUNTING |
    1214             :                                              EXTENT_DEFRAG, PAGE_UNLOCK |
    1215             :                                              PAGE_CLEAR_DIRTY |
    1216             :                                              PAGE_SET_WRITEBACK |
    1217             :                                              PAGE_END_WRITEBACK);
    1218           0 :                 btrfs_free_path(path);
    1219           0 :                 return PTR_ERR(trans);
    1220             :         }
    1221             : 
    1222       13707 :         trans->block_rsv = &root->fs_info->delalloc_block_rsv;
    1223             : 
    1224             :         cow_start = (u64)-1;
    1225             :         cur_offset = start;
    1226             :         while (1) {
    1227       13749 :                 ret = btrfs_lookup_file_extent(trans, root, path, ino,
    1228             :                                                cur_offset, 0);
    1229       13749 :                 if (ret < 0)
    1230             :                         goto error;
    1231       13749 :                 if (ret > 0 && path->slots[0] > 0 && check_prev) {
    1232        2639 :                         leaf = path->nodes[0];
    1233        2639 :                         btrfs_item_key_to_cpu(leaf, &found_key,
    1234             :                                               path->slots[0] - 1);
    1235        5278 :                         if (found_key.objectid == ino &&
    1236        2639 :                             found_key.type == BTRFS_EXTENT_DATA_KEY)
    1237        2639 :                                 path->slots[0]--;
    1238             :                 }
    1239             :                 check_prev = 0;
    1240             : next_slot:
    1241       14214 :                 leaf = path->nodes[0];
    1242       28428 :                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
    1243           4 :                         ret = btrfs_next_leaf(root, path);
    1244           4 :                         if (ret < 0)
    1245             :                                 goto error;
    1246           4 :                         if (ret > 0)
    1247             :                                 break;
    1248           4 :                         leaf = path->nodes[0];
    1249             :                 }
    1250             : 
    1251             :                 nocow = 0;
    1252             :                 disk_bytenr = 0;
    1253             :                 num_bytes = 0;
    1254       14214 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    1255             : 
    1256       28414 :                 if (found_key.objectid > ino ||
    1257       28400 :                     found_key.type > BTRFS_EXTENT_DATA_KEY ||
    1258       14200 :                     found_key.offset > end)
    1259             :                         break;
    1260             : 
    1261       14199 :                 if (found_key.offset > cur_offset) {
    1262             :                         extent_end = found_key.offset;
    1263             :                         extent_type = 0;
    1264             :                         goto out_check;
    1265             :                 }
    1266             : 
    1267       28398 :                 fi = btrfs_item_ptr(leaf, path->slots[0],
    1268             :                                     struct btrfs_file_extent_item);
    1269       14199 :                 extent_type = btrfs_file_extent_type(leaf, fi);
    1270             : 
    1271             :                 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
    1272       14199 :                 if (extent_type == BTRFS_FILE_EXTENT_REG ||
    1273             :                     extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
    1274             :                         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
    1275             :                         extent_offset = btrfs_file_extent_offset(leaf, fi);
    1276       28397 :                         extent_end = found_key.offset +
    1277             :                                 btrfs_file_extent_num_bytes(leaf, fi);
    1278             :                         disk_num_bytes =
    1279             :                                 btrfs_file_extent_disk_num_bytes(leaf, fi);
    1280       14199 :                         if (extent_end <= start) {
    1281           0 :                                 path->slots[0]++;
    1282           0 :                                 goto next_slot;
    1283             :                         }
    1284       14199 :                         if (disk_bytenr == 0)
    1285             :                                 goto out_check;
    1286       26239 :                         if (btrfs_file_extent_compression(leaf, fi) ||
    1287       13114 :                             btrfs_file_extent_encryption(leaf, fi) ||
    1288             :                             btrfs_file_extent_other_encoding(leaf, fi))
    1289             :                                 goto out_check;
    1290       13114 :                         if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
    1291             :                                 goto out_check;
    1292        6754 :                         if (btrfs_extent_readonly(root, disk_bytenr))
    1293             :                                 goto out_check;
    1294        6754 :                         if (btrfs_cross_ref_exist(trans, root, ino,
    1295        6754 :                                                   found_key.offset -
    1296             :                                                   extent_offset, disk_bytenr))
    1297             :                                 goto out_check;
    1298        6654 :                         disk_bytenr += extent_offset;
    1299        6654 :                         disk_bytenr += cur_offset - found_key.offset;
    1300        6654 :                         num_bytes = min(end + 1, extent_end) - cur_offset;
    1301             :                         /*
    1302             :                          * if there are pending snapshots for this root,
    1303             :                          * we fall into common COW way.
    1304             :                          */
    1305        6654 :                         if (!nolock) {
    1306        2597 :                                 err = btrfs_start_nocow_write(root);
    1307        2597 :                                 if (!err)
    1308             :                                         goto out_check;
    1309             :                         }
    1310             :                         /*
    1311             :                          * force cow if csum exists in the range.
    1312             :                          * this ensure that csum for a given extent are
    1313             :                          * either valid or do not exist.
    1314             :                          */
    1315        5708 :                         if (csum_exist_in_range(root, disk_bytenr, num_bytes))
    1316             :                                 goto out_check;
    1317             :                         nocow = 1;
    1318           0 :                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
    1319           0 :                         extent_end = found_key.offset +
    1320           0 :                                 btrfs_file_extent_inline_len(leaf,
    1321             :                                                      path->slots[0], fi);
    1322           0 :                         extent_end = ALIGN(extent_end, root->sectorsize);
    1323             :                 } else {
    1324           0 :                         BUG_ON(1);
    1325             :                 }
    1326             : out_check:
    1327       14198 :                 if (extent_end <= start) {
    1328           0 :                         path->slots[0]++;
    1329           0 :                         if (!nolock && nocow)
    1330           0 :                                 btrfs_end_nocow_write(root);
    1331             :                         goto next_slot;
    1332             :                 }
    1333       14198 :                 if (!nocow) {
    1334        8491 :                         if (cow_start == (u64)-1)
    1335             :                                 cow_start = cur_offset;
    1336             :                         cur_offset = extent_end;
    1337        8491 :                         if (cur_offset > end)
    1338             :                                 break;
    1339         465 :                         path->slots[0]++;
    1340         465 :                         goto next_slot;
    1341             :                 }
    1342             : 
    1343        5707 :                 btrfs_release_path(path);
    1344        5707 :                 if (cow_start != (u64)-1) {
    1345         147 :                         ret = cow_file_range(inode, locked_page,
    1346         147 :                                              cow_start, found_key.offset - 1,
    1347             :                                              page_started, nr_written, 1);
    1348         147 :                         if (ret) {
    1349           0 :                                 if (!nolock && nocow)
    1350           0 :                                         btrfs_end_nocow_write(root);
    1351             :                                 goto error;
    1352             :                         }
    1353             :                         cow_start = (u64)-1;
    1354             :                 }
    1355             : 
    1356        5707 :                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
    1357             :                         struct extent_map *em;
    1358             :                         struct extent_map_tree *em_tree;
    1359        5625 :                         em_tree = &BTRFS_I(inode)->extent_tree;
    1360        5625 :                         em = alloc_extent_map();
    1361        5625 :                         BUG_ON(!em); /* -ENOMEM */
    1362        5625 :                         em->start = cur_offset;
    1363        5625 :                         em->orig_start = found_key.offset - extent_offset;
    1364        5625 :                         em->len = num_bytes;
    1365        5625 :                         em->block_len = num_bytes;
    1366        5625 :                         em->block_start = disk_bytenr;
    1367        5625 :                         em->orig_block_len = disk_num_bytes;
    1368        5625 :                         em->ram_bytes = ram_bytes;
    1369        5625 :                         em->bdev = root->fs_info->fs_devices->latest_bdev;
    1370        5625 :                         em->mod_start = em->start;
    1371        5625 :                         em->mod_len = em->len;
    1372             :                         set_bit(EXTENT_FLAG_PINNED, &em->flags);
    1373             :                         set_bit(EXTENT_FLAG_FILLING, &em->flags);
    1374        5625 :                         em->generation = -1;
    1375             :                         while (1) {
    1376       11250 :                                 write_lock(&em_tree->lock);
    1377       11250 :                                 ret = add_extent_mapping(em_tree, em, 1);
    1378             :                                 write_unlock(&em_tree->lock);
    1379       11250 :                                 if (ret != -EEXIST) {
    1380        5625 :                                         free_extent_map(em);
    1381             :                                         break;
    1382             :                                 }
    1383        5625 :                                 btrfs_drop_extent_cache(inode, em->start,
    1384        5625 :                                                 em->start + em->len - 1, 0);
    1385        5625 :                         }
    1386             :                         type = BTRFS_ORDERED_PREALLOC;
    1387             :                 } else {
    1388             :                         type = BTRFS_ORDERED_NOCOW;
    1389             :                 }
    1390             : 
    1391        5707 :                 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
    1392             :                                                num_bytes, num_bytes, type);
    1393        5707 :                 BUG_ON(ret); /* -ENOMEM */
    1394             : 
    1395        5707 :                 if (root->root_key.objectid ==
    1396             :                     BTRFS_DATA_RELOC_TREE_OBJECTID) {
    1397        1021 :                         ret = btrfs_reloc_clone_csums(inode, cur_offset,
    1398             :                                                       num_bytes);
    1399        1021 :                         if (ret) {
    1400           0 :                                 if (!nolock && nocow)
    1401           0 :                                         btrfs_end_nocow_write(root);
    1402             :                                 goto error;
    1403             :                         }
    1404             :                 }
    1405             : 
    1406        5707 :                 extent_clear_unlock_delalloc(inode, cur_offset,
    1407        5707 :                                              cur_offset + num_bytes - 1,
    1408             :                                              locked_page, EXTENT_LOCKED |
    1409             :                                              EXTENT_DELALLOC, PAGE_UNLOCK |
    1410             :                                              PAGE_SET_PRIVATE2);
    1411        5707 :                 if (!nolock && nocow)
    1412        1650 :                         btrfs_end_nocow_write(root);
    1413             :                 cur_offset = extent_end;
    1414        5707 :                 if (cur_offset > end)
    1415             :                         break;
    1416             :         }
    1417       13706 :         btrfs_release_path(path);
    1418             : 
    1419       13707 :         if (cur_offset <= end && cow_start == (u64)-1) {
    1420             :                 cow_start = cur_offset;
    1421             :                 cur_offset = end;
    1422             :         }
    1423             : 
    1424       13707 :         if (cow_start != (u64)-1) {
    1425        8042 :                 ret = cow_file_range(inode, locked_page, cow_start, end,
    1426             :                                      page_started, nr_written, 1);
    1427             :                 if (ret)
    1428             :                         goto error;
    1429             :         }
    1430             : 
    1431             : error:
    1432       13707 :         err = btrfs_end_transaction(trans, root);
    1433       13707 :         if (!ret)
    1434             :                 ret = err;
    1435             : 
    1436       13707 :         if (ret && cur_offset < end)
    1437           0 :                 extent_clear_unlock_delalloc(inode, cur_offset, end,
    1438             :                                              locked_page, EXTENT_LOCKED |
    1439             :                                              EXTENT_DELALLOC | EXTENT_DEFRAG |
    1440             :                                              EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
    1441             :                                              PAGE_CLEAR_DIRTY |
    1442             :                                              PAGE_SET_WRITEBACK |
    1443             :                                              PAGE_END_WRITEBACK);
    1444       13707 :         btrfs_free_path(path);
    1445       13707 :         return ret;
    1446             : }
    1447             : 
    1448             : /*
    1449             :  * extent_io.c call back to do delayed allocation processing
    1450             :  */
    1451       29881 : static int run_delalloc_range(struct inode *inode, struct page *locked_page,
    1452             :                               u64 start, u64 end, int *page_started,
    1453             :                               unsigned long *nr_written)
    1454             : {
    1455             :         int ret;
    1456       29881 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1457             : 
    1458       29881 :         if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
    1459        4057 :                 ret = run_delalloc_nocow(inode, locked_page, start, end,
    1460             :                                          page_started, 1, nr_written);
    1461       25824 :         } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
    1462        9650 :                 ret = run_delalloc_nocow(inode, locked_page, start, end,
    1463             :                                          page_started, 0, nr_written);
    1464       32144 :         } else if (!btrfs_test_opt(root, COMPRESS) &&
    1465       31936 :                    !(BTRFS_I(inode)->force_compress) &&
    1466       15966 :                    !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
    1467       15966 :                 ret = cow_file_range(inode, locked_page, start, end,
    1468             :                                       page_started, nr_written, 1);
    1469             :         } else {
    1470             :                 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
    1471             :                         &BTRFS_I(inode)->runtime_flags);
    1472         208 :                 ret = cow_file_range_async(inode, locked_page, start, end,
    1473             :                                            page_started, nr_written);
    1474             :         }
    1475       29881 :         return ret;
    1476             : }
    1477             : 
    1478      597391 : static void btrfs_split_extent_hook(struct inode *inode,
    1479             :                                     struct extent_state *orig, u64 split)
    1480             : {
    1481             :         /* not delalloc, ignore it */
    1482      597391 :         if (!(orig->state & EXTENT_DELALLOC))
    1483      597391 :                 return;
    1484             : 
    1485             :         spin_lock(&BTRFS_I(inode)->lock);
    1486       27418 :         BTRFS_I(inode)->outstanding_extents++;
    1487             :         spin_unlock(&BTRFS_I(inode)->lock);
    1488             : }
    1489             : 
    1490             : /*
    1491             :  * extent_io.c merge_extent_hook, used to track merged delayed allocation
    1492             :  * extents so we can keep track of new extents that are just merged onto old
    1493             :  * extents, such as when we are doing sequential writes, so we can properly
    1494             :  * account for the metadata space we'll need.
    1495             :  */
    1496      555603 : static void btrfs_merge_extent_hook(struct inode *inode,
    1497             :                                     struct extent_state *new,
    1498             :                                     struct extent_state *other)
    1499             : {
    1500             :         /* not delalloc, ignore it */
    1501      555603 :         if (!(other->state & EXTENT_DELALLOC))
    1502      555603 :                 return;
    1503             : 
    1504             :         spin_lock(&BTRFS_I(inode)->lock);
    1505      116028 :         BTRFS_I(inode)->outstanding_extents--;
    1506             :         spin_unlock(&BTRFS_I(inode)->lock);
    1507             : }
    1508             : 
    1509       48376 : static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
    1510             :                                       struct inode *inode)
    1511             : {
    1512             :         spin_lock(&root->delalloc_lock);
    1513       96758 :         if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
    1514       48379 :                 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
    1515             :                               &root->delalloc_inodes);
    1516             :                 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
    1517             :                         &BTRFS_I(inode)->runtime_flags);
    1518       48377 :                 root->nr_delalloc_inodes++;
    1519       48377 :                 if (root->nr_delalloc_inodes == 1) {
    1520       32365 :                         spin_lock(&root->fs_info->delalloc_root_lock);
    1521       64734 :                         BUG_ON(!list_empty(&root->delalloc_root));
    1522       32367 :                         list_add_tail(&root->delalloc_root,
    1523       32367 :                                       &root->fs_info->delalloc_roots);
    1524       32367 :                         spin_unlock(&root->fs_info->delalloc_root_lock);
    1525             :                 }
    1526             :         }
    1527             :         spin_unlock(&root->delalloc_lock);
    1528       48378 : }
    1529             : 
    1530       48378 : static void btrfs_del_delalloc_inode(struct btrfs_root *root,
    1531             :                                      struct inode *inode)
    1532             : {
    1533             :         spin_lock(&root->delalloc_lock);
    1534       96758 :         if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
    1535             :                 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
    1536             :                 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
    1537             :                           &BTRFS_I(inode)->runtime_flags);
    1538       48377 :                 root->nr_delalloc_inodes--;
    1539       48377 :                 if (!root->nr_delalloc_inodes) {
    1540       32366 :                         spin_lock(&root->fs_info->delalloc_root_lock);
    1541       64734 :                         BUG_ON(list_empty(&root->delalloc_root));
    1542             :                         list_del_init(&root->delalloc_root);
    1543       32367 :                         spin_unlock(&root->fs_info->delalloc_root_lock);
    1544             :                 }
    1545             :         }
    1546             :         spin_unlock(&root->delalloc_lock);
    1547       48378 : }
    1548             : 
    1549             : /*
    1550             :  * extent_io.c set_bit_hook, used to track delayed allocation
    1551             :  * bytes in this file, and to maintain the list of inodes that
    1552             :  * have pending delalloc work to be done.
    1553             :  */
    1554     1083201 : static void btrfs_set_bit_hook(struct inode *inode,
    1555             :                                struct extent_state *state, unsigned long *bits)
    1556             : {
    1557             : 
    1558             :         /*
    1559             :          * set_bit and clear bit hooks normally require _irqsave/restore
    1560             :          * but in this case, we are only testing for the DELALLOC
    1561             :          * bit, which is only set or cleared with irqs on
    1562             :          */
    1563     1083201 :         if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
    1564      178033 :                 struct btrfs_root *root = BTRFS_I(inode)->root;
    1565      178033 :                 u64 len = state->end + 1 - state->start;
    1566      178033 :                 bool do_list = !btrfs_is_free_space_inode(inode);
    1567             : 
    1568      178032 :                 if (*bits & EXTENT_FIRST_DELALLOC) {
    1569      158382 :                         *bits &= ~EXTENT_FIRST_DELALLOC;
    1570             :                 } else {
    1571             :                         spin_lock(&BTRFS_I(inode)->lock);
    1572       19650 :                         BTRFS_I(inode)->outstanding_extents++;
    1573             :                         spin_unlock(&BTRFS_I(inode)->lock);
    1574             :                 }
    1575             : 
    1576      178032 :                 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
    1577      178032 :                                      root->fs_info->delalloc_batch);
    1578             :                 spin_lock(&BTRFS_I(inode)->lock);
    1579      178034 :                 BTRFS_I(inode)->delalloc_bytes += len;
    1580      352013 :                 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
    1581             :                                          &BTRFS_I(inode)->runtime_flags))
    1582       48376 :                         btrfs_add_delalloc_inodes(root, inode);
    1583             :                 spin_unlock(&BTRFS_I(inode)->lock);
    1584             :         }
    1585     1083205 : }
    1586             : 
    1587             : /*
    1588             :  * extent_io.c clear_bit_hook, see set_bit_hook for why
    1589             :  */
    1590      858660 : static void btrfs_clear_bit_hook(struct inode *inode,
    1591             :                                  struct extent_state *state,
    1592             :                                  unsigned long *bits)
    1593             : {
    1594             :         /*
    1595             :          * set_bit and clear bit hooks normally require _irqsave/restore
    1596             :          * but in this case, we are only testing for the DELALLOC
    1597             :          * bit, which is only set or cleared with irqs on
    1598             :          */
    1599      858660 :         if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
    1600       89406 :                 struct btrfs_root *root = BTRFS_I(inode)->root;
    1601       89406 :                 u64 len = state->end + 1 - state->start;
    1602       89406 :                 bool do_list = !btrfs_is_free_space_inode(inode);
    1603             : 
    1604       89409 :                 if (*bits & EXTENT_FIRST_DELALLOC) {
    1605       89408 :                         *bits &= ~EXTENT_FIRST_DELALLOC;
    1606           1 :                 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
    1607             :                         spin_lock(&BTRFS_I(inode)->lock);
    1608           1 :                         BTRFS_I(inode)->outstanding_extents--;
    1609             :                         spin_unlock(&BTRFS_I(inode)->lock);
    1610             :                 }
    1611             : 
    1612             :                 /*
    1613             :                  * We don't reserve metadata space for space cache inodes so we
    1614             :                  * don't need to call dellalloc_release_metadata if there is an
    1615             :                  * error.
    1616             :                  */
    1617      123471 :                 if (*bits & EXTENT_DO_ACCOUNTING &&
    1618       34063 :                     root != root->fs_info->tree_root)
    1619       34063 :                         btrfs_delalloc_release_metadata(inode, len);
    1620             : 
    1621       89408 :                 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
    1622       88386 :                     && do_list && !(state->state & EXTENT_NORESERVE))
    1623       84330 :                         btrfs_free_reserved_data_space(inode, len);
    1624             : 
    1625       89419 :                 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
    1626       89419 :                                      root->fs_info->delalloc_batch);
    1627             :                 spin_lock(&BTRFS_I(inode)->lock);
    1628       89420 :                 BTRFS_I(inode)->delalloc_bytes -= len;
    1629      137799 :                 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
    1630             :                     test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
    1631             :                              &BTRFS_I(inode)->runtime_flags))
    1632       48378 :                         btrfs_del_delalloc_inode(root, inode);
    1633             :                 spin_unlock(&BTRFS_I(inode)->lock);
    1634             :         }
    1635      858673 : }
    1636             : 
    1637             : /*
    1638             :  * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
    1639             :  * we don't create bios that span stripes or chunks
    1640             :  */
    1641     1495689 : int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
    1642             :                          size_t size, struct bio *bio,
    1643             :                          unsigned long bio_flags)
    1644             : {
    1645     1495689 :         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
    1646     1495689 :         u64 logical = (u64)bio->bi_iter.bi_sector << 9;
    1647             :         u64 length = 0;
    1648             :         u64 map_length;
    1649             :         int ret;
    1650             : 
    1651     1495689 :         if (bio_flags & EXTENT_BIO_COMPRESSED)
    1652             :                 return 0;
    1653             : 
    1654     1495399 :         length = bio->bi_iter.bi_size;
    1655     1495399 :         map_length = length;
    1656     1495399 :         ret = btrfs_map_block(root->fs_info, rw, logical,
    1657             :                               &map_length, NULL, 0);
    1658             :         /* Will always return 0 with map_multi == NULL */
    1659     1495418 :         BUG_ON(ret < 0);
    1660     1495418 :         if (map_length < length + size)
    1661             :                 return 1;
    1662     1447382 :         return 0;
    1663             : }
    1664             : 
    1665             : /*
    1666             :  * in order to insert checksums into the metadata in large chunks,
    1667             :  * we wait until bio submission time.   All the pages in the bio are
    1668             :  * checksummed and sums are attached onto the ordered extent record.
    1669             :  *
    1670             :  * At IO completion time the cums attached on the ordered extent record
    1671             :  * are inserted into the btree
    1672             :  */
    1673       59544 : static int __btrfs_submit_bio_start(struct inode *inode, int rw,
    1674             :                                     struct bio *bio, int mirror_num,
    1675             :                                     unsigned long bio_flags,
    1676             :                                     u64 bio_offset)
    1677             : {
    1678       59544 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1679             :         int ret = 0;
    1680             : 
    1681       59544 :         ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
    1682       59530 :         BUG_ON(ret); /* -ENOMEM */
    1683       59530 :         return 0;
    1684             : }
    1685             : 
    1686             : /*
    1687             :  * in order to insert checksums into the metadata in large chunks,
    1688             :  * we wait until bio submission time.   All the pages in the bio are
    1689             :  * checksummed and sums are attached onto the ordered extent record.
    1690             :  *
    1691             :  * At IO completion time the cums attached on the ordered extent record
    1692             :  * are inserted into the btree
    1693             :  */
    1694       59547 : static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
    1695             :                           int mirror_num, unsigned long bio_flags,
    1696             :                           u64 bio_offset)
    1697             : {
    1698       59547 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1699             :         int ret;
    1700             : 
    1701       59547 :         ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
    1702       59547 :         if (ret)
    1703           0 :                 bio_endio(bio, ret);
    1704       59547 :         return ret;
    1705             : }
    1706             : 
    1707             : /*
    1708             :  * extent_io.c submission hook. This does the right thing for csum calculation
    1709             :  * on write, or reading the csums from the tree before a read
    1710             :  */
    1711       92080 : static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
    1712             :                           int mirror_num, unsigned long bio_flags,
    1713             :                           u64 bio_offset)
    1714             : {
    1715       92080 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1716             :         int ret = 0;
    1717             :         int skip_sum;
    1718             :         int metadata = 0;
    1719       92080 :         int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
    1720             : 
    1721       92080 :         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
    1722             : 
    1723       92080 :         if (btrfs_is_free_space_inode(inode))
    1724             :                 metadata = 2;
    1725             : 
    1726       92080 :         if (!(rw & REQ_WRITE)) {
    1727       26396 :                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
    1728       26396 :                 if (ret)
    1729             :                         goto out;
    1730             : 
    1731       26396 :                 if (bio_flags & EXTENT_BIO_COMPRESSED) {
    1732          42 :                         ret = btrfs_submit_compressed_read(inode, bio,
    1733             :                                                            mirror_num,
    1734             :                                                            bio_flags);
    1735          42 :                         goto out;
    1736       26354 :                 } else if (!skip_sum) {
    1737       26053 :                         ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
    1738       26053 :                         if (ret)
    1739             :                                 goto out;
    1740             :                 }
    1741             :                 goto mapit;
    1742       65684 :         } else if (async && !skip_sum) {
    1743             :                 /* csum items have already been cloned */
    1744       59559 :                 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
    1745             :                         goto mapit;
    1746             :                 /* we're doing a write, do the async checksumming */
    1747       59547 :                 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
    1748             :                                    inode, rw, bio, mirror_num,
    1749             :                                    bio_flags, bio_offset,
    1750             :                                    __btrfs_submit_bio_start,
    1751             :                                    __btrfs_submit_bio_done);
    1752       59547 :                 goto out;
    1753        6125 :         } else if (!skip_sum) {
    1754        1609 :                 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
    1755        1609 :                 if (ret)
    1756             :                         goto out;
    1757             :         }
    1758             : 
    1759             : mapit:
    1760       32491 :         ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
    1761             : 
    1762             : out:
    1763       92080 :         if (ret < 0)
    1764           0 :                 bio_endio(bio, ret);
    1765       92080 :         return ret;
    1766             : }
    1767             : 
    1768             : /*
    1769             :  * given a list of ordered sums record them in the inode.  This happens
    1770             :  * at IO completion time based on sums calculated at bio submission time.
    1771             :  */
    1772       51414 : static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
    1773             :                              struct inode *inode, u64 file_offset,
    1774             :                              struct list_head *list)
    1775             : {
    1776             :         struct btrfs_ordered_sum *sum;
    1777             : 
    1778      142297 :         list_for_each_entry(sum, list, list) {
    1779       90874 :                 trans->adding_csums = 1;
    1780       90874 :                 btrfs_csum_file_blocks(trans,
    1781       90874 :                        BTRFS_I(inode)->root->fs_info->csum_root, sum);
    1782       90883 :                 trans->adding_csums = 0;
    1783             :         }
    1784       51423 :         return 0;
    1785             : }
    1786             : 
    1787      131791 : int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
    1788             :                               struct extent_state **cached_state)
    1789             : {
    1790      131791 :         WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
    1791      131791 :         return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
    1792             :                                    cached_state, GFP_NOFS);
    1793             : }
    1794             : 
    1795             : /* see btrfs_writepage_start_hook for details on why this is required */
    1796             : struct btrfs_writepage_fixup {
    1797             :         struct page *page;
    1798             :         struct btrfs_work work;
    1799             : };
    1800             : 
    1801           0 : static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
    1802             : {
    1803             :         struct btrfs_writepage_fixup *fixup;
    1804             :         struct btrfs_ordered_extent *ordered;
    1805           0 :         struct extent_state *cached_state = NULL;
    1806           0 :         struct page *page;
    1807             :         struct inode *inode;
    1808             :         u64 page_start;
    1809             :         u64 page_end;
    1810             :         int ret;
    1811             : 
    1812           0 :         fixup = container_of(work, struct btrfs_writepage_fixup, work);
    1813           0 :         page = fixup->page;
    1814             : again:
    1815           0 :         lock_page(page);
    1816           0 :         if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
    1817             :                 ClearPageChecked(page);
    1818             :                 goto out_page;
    1819             :         }
    1820             : 
    1821           0 :         inode = page->mapping->host;
    1822           0 :         page_start = page_offset(page);
    1823           0 :         page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
    1824             : 
    1825           0 :         lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
    1826             :                          &cached_state);
    1827             : 
    1828             :         /* already ordered? We're done */
    1829           0 :         if (PagePrivate2(page))
    1830             :                 goto out;
    1831             : 
    1832           0 :         ordered = btrfs_lookup_ordered_extent(inode, page_start);
    1833           0 :         if (ordered) {
    1834           0 :                 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
    1835             :                                      page_end, &cached_state, GFP_NOFS);
    1836           0 :                 unlock_page(page);
    1837           0 :                 btrfs_start_ordered_extent(inode, ordered, 1);
    1838           0 :                 btrfs_put_ordered_extent(ordered);
    1839           0 :                 goto again;
    1840             :         }
    1841             : 
    1842           0 :         ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
    1843           0 :         if (ret) {
    1844           0 :                 mapping_set_error(page->mapping, ret);
    1845           0 :                 end_extent_writepage(page, ret, page_start, page_end);
    1846             :                 ClearPageChecked(page);
    1847             :                 goto out;
    1848             :          }
    1849             : 
    1850           0 :         btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
    1851             :         ClearPageChecked(page);
    1852           0 :         set_page_dirty(page);
    1853             : out:
    1854           0 :         unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
    1855             :                              &cached_state, GFP_NOFS);
    1856             : out_page:
    1857           0 :         unlock_page(page);
    1858           0 :         page_cache_release(page);
    1859           0 :         kfree(fixup);
    1860           0 : }
    1861             : 
    1862             : /*
    1863             :  * There are a few paths in the higher layers of the kernel that directly
    1864             :  * set the page dirty bit without asking the filesystem if it is a
    1865             :  * good idea.  This causes problems because we want to make sure COW
    1866             :  * properly happens and the data=ordered rules are followed.
    1867             :  *
    1868             :  * In our case any range that doesn't have the ORDERED bit set
    1869             :  * hasn't been properly setup for IO.  We kick off an async process
    1870             :  * to fix it up.  The async helper will wait for ordered extents, set
    1871             :  * the delalloc bit and make it safe to write the page.
    1872             :  */
    1873     1308900 : static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
    1874             : {
    1875     1308900 :         struct inode *inode = page->mapping->host;
    1876             :         struct btrfs_writepage_fixup *fixup;
    1877     1308900 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1878             : 
    1879             :         /* this page is properly in the ordered list */
    1880     1308918 :         if (TestClearPagePrivate2(page))
    1881             :                 return 0;
    1882             : 
    1883           0 :         if (PageChecked(page))
    1884             :                 return -EAGAIN;
    1885             : 
    1886           0 :         fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
    1887           0 :         if (!fixup)
    1888             :                 return -EAGAIN;
    1889             : 
    1890             :         SetPageChecked(page);
    1891           0 :         page_cache_get(page);
    1892           0 :         btrfs_init_work(&fixup->work, btrfs_fixup_helper,
    1893             :                         btrfs_writepage_fixup_worker, NULL, NULL);
    1894           0 :         fixup->page = page;
    1895           0 :         btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
    1896           0 :         return -EBUSY;
    1897             : }
    1898             : 
    1899       53371 : static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
    1900             :                                        struct inode *inode, u64 file_pos,
    1901             :                                        u64 disk_bytenr, u64 disk_num_bytes,
    1902             :                                        u64 num_bytes, u64 ram_bytes,
    1903             :                                        u8 compression, u8 encryption,
    1904             :                                        u16 other_encoding, int extent_type)
    1905             : {
    1906       53371 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1907             :         struct btrfs_file_extent_item *fi;
    1908             :         struct btrfs_path *path;
    1909             :         struct extent_buffer *leaf;
    1910             :         struct btrfs_key ins;
    1911       53371 :         int extent_inserted = 0;
    1912             :         int ret;
    1913             : 
    1914       53371 :         path = btrfs_alloc_path();
    1915       53374 :         if (!path)
    1916             :                 return -ENOMEM;
    1917             : 
    1918             :         /*
    1919             :          * we may be replacing one extent in the tree with another.
    1920             :          * The new extent is pinned in the extent map, and we don't want
    1921             :          * to drop it from the cache until it is completely in the btree.
    1922             :          *
    1923             :          * So, tell btrfs_drop_extents to leave this extent in the cache.
    1924             :          * the caller is expected to unpin it and allow it to be merged
    1925             :          * with the others.
    1926             :          */
    1927       53373 :         ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
    1928             :                                    file_pos + num_bytes, NULL, 0,
    1929             :                                    1, sizeof(*fi), &extent_inserted);
    1930       53376 :         if (ret)
    1931             :                 goto out;
    1932             : 
    1933       53375 :         if (!extent_inserted) {
    1934        5148 :                 ins.objectid = btrfs_ino(inode);
    1935        5148 :                 ins.offset = file_pos;
    1936        5148 :                 ins.type = BTRFS_EXTENT_DATA_KEY;
    1937             : 
    1938        5148 :                 path->leave_spinning = 1;
    1939             :                 ret = btrfs_insert_empty_item(trans, root, path, &ins,
    1940             :                                               sizeof(*fi));
    1941        5149 :                 if (ret)
    1942             :                         goto out;
    1943             :         }
    1944       53376 :         leaf = path->nodes[0];
    1945      106751 :         fi = btrfs_item_ptr(leaf, path->slots[0],
    1946             :                             struct btrfs_file_extent_item);
    1947       53375 :         btrfs_set_file_extent_generation(leaf, fi, trans->transid);
    1948       53373 :         btrfs_set_file_extent_type(leaf, fi, extent_type);
    1949             :         btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
    1950             :         btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
    1951             :         btrfs_set_file_extent_offset(leaf, fi, 0);
    1952             :         btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
    1953             :         btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
    1954             :         btrfs_set_file_extent_compression(leaf, fi, compression);
    1955             :         btrfs_set_file_extent_encryption(leaf, fi, encryption);
    1956             :         btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
    1957             : 
    1958       53374 :         btrfs_mark_buffer_dirty(leaf);
    1959       53380 :         btrfs_release_path(path);
    1960             : 
    1961       53372 :         inode_add_bytes(inode, num_bytes);
    1962             : 
    1963       53378 :         ins.objectid = disk_bytenr;
    1964       53378 :         ins.offset = disk_num_bytes;
    1965       53378 :         ins.type = BTRFS_EXTENT_ITEM_KEY;
    1966       53378 :         ret = btrfs_alloc_reserved_file_extent(trans, root,
    1967             :                                         root->root_key.objectid,
    1968             :                                         btrfs_ino(inode), file_pos, &ins);
    1969             : out:
    1970       53381 :         btrfs_free_path(path);
    1971             : 
    1972       53379 :         return ret;
    1973             : }
    1974             : 
    1975             : /* snapshot-aware defrag */
    1976             : struct sa_defrag_extent_backref {
    1977             :         struct rb_node node;
    1978             :         struct old_sa_defrag_extent *old;
    1979             :         u64 root_id;
    1980             :         u64 inum;
    1981             :         u64 file_pos;
    1982             :         u64 extent_offset;
    1983             :         u64 num_bytes;
    1984             :         u64 generation;
    1985             : };
    1986             : 
    1987             : struct old_sa_defrag_extent {
    1988             :         struct list_head list;
    1989             :         struct new_sa_defrag_extent *new;
    1990             : 
    1991             :         u64 extent_offset;
    1992             :         u64 bytenr;
    1993             :         u64 offset;
    1994             :         u64 len;
    1995             :         int count;
    1996             : };
    1997             : 
    1998             : struct new_sa_defrag_extent {
    1999             :         struct rb_root root;
    2000             :         struct list_head head;
    2001             :         struct btrfs_path *path;
    2002             :         struct inode *inode;
    2003             :         u64 file_pos;
    2004             :         u64 len;
    2005             :         u64 bytenr;
    2006             :         u64 disk_len;
    2007             :         u8 compress_type;
    2008             : };
    2009             : 
    2010             : static int backref_comp(struct sa_defrag_extent_backref *b1,
    2011             :                         struct sa_defrag_extent_backref *b2)
    2012             : {
    2013             :         if (b1->root_id < b2->root_id)
    2014             :                 return -1;
    2015             :         else if (b1->root_id > b2->root_id)
    2016             :                 return 1;
    2017             : 
    2018             :         if (b1->inum < b2->inum)
    2019             :                 return -1;
    2020             :         else if (b1->inum > b2->inum)
    2021             :                 return 1;
    2022             : 
    2023             :         if (b1->file_pos < b2->file_pos)
    2024             :                 return -1;
    2025             :         else if (b1->file_pos > b2->file_pos)
    2026             :                 return 1;
    2027             : 
    2028             :         /*
    2029             :          * [------------------------------] ===> (a range of space)
    2030             :          *     |<--->|   |<---->| =============> (fs/file tree A)
    2031             :          * |<---------------------------->| ===> (fs/file tree B)
    2032             :          *
    2033             :          * A range of space can refer to two file extents in one tree while
    2034             :          * refer to only one file extent in another tree.
    2035             :          *
    2036             :          * So we may process a disk offset more than one time(two extents in A)
    2037             :          * and locate at the same extent(one extent in B), then insert two same
    2038             :          * backrefs(both refer to the extent in B).
    2039             :          */
    2040             :         return 0;
    2041             : }
    2042             : 
    2043             : static void backref_insert(struct rb_root *root,
    2044             :                            struct sa_defrag_extent_backref *backref)
    2045             : {
    2046             :         struct rb_node **p = &root->rb_node;
    2047             :         struct rb_node *parent = NULL;
    2048             :         struct sa_defrag_extent_backref *entry;
    2049             :         int ret;
    2050             : 
    2051             :         while (*p) {
    2052             :                 parent = *p;
    2053             :                 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
    2054             : 
    2055             :                 ret = backref_comp(backref, entry);
    2056             :                 if (ret < 0)
    2057             :                         p = &(*p)->rb_left;
    2058             :                 else
    2059             :                         p = &(*p)->rb_right;
    2060             :         }
    2061             : 
    2062             :         rb_link_node(&backref->node, parent, p);
    2063             :         rb_insert_color(&backref->node, root);
    2064             : }
    2065             : 
    2066             : /*
    2067             :  * Note the backref might has changed, and in this case we just return 0.
    2068             :  */
    2069             : static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
    2070             :                                        void *ctx)
    2071             : {
    2072             :         struct btrfs_file_extent_item *extent;
    2073             :         struct btrfs_fs_info *fs_info;
    2074             :         struct old_sa_defrag_extent *old = ctx;
    2075             :         struct new_sa_defrag_extent *new = old->new;
    2076             :         struct btrfs_path *path = new->path;
    2077             :         struct btrfs_key key;
    2078             :         struct btrfs_root *root;
    2079             :         struct sa_defrag_extent_backref *backref;
    2080             :         struct extent_buffer *leaf;
    2081             :         struct inode *inode = new->inode;
    2082             :         int slot;
    2083             :         int ret;
    2084             :         u64 extent_offset;
    2085             :         u64 num_bytes;
    2086             : 
    2087             :         if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
    2088             :             inum == btrfs_ino(inode))
    2089             :                 return 0;
    2090             : 
    2091             :         key.objectid = root_id;
    2092             :         key.type = BTRFS_ROOT_ITEM_KEY;
    2093             :         key.offset = (u64)-1;
    2094             : 
    2095             :         fs_info = BTRFS_I(inode)->root->fs_info;
    2096             :         root = btrfs_read_fs_root_no_name(fs_info, &key);
    2097             :         if (IS_ERR(root)) {
    2098             :                 if (PTR_ERR(root) == -ENOENT)
    2099             :                         return 0;
    2100             :                 WARN_ON(1);
    2101             :                 pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
    2102             :                          inum, offset, root_id);
    2103             :                 return PTR_ERR(root);
    2104             :         }
    2105             : 
    2106             :         key.objectid = inum;
    2107             :         key.type = BTRFS_EXTENT_DATA_KEY;
    2108             :         if (offset > (u64)-1 << 32)
    2109             :                 key.offset = 0;
    2110             :         else
    2111             :                 key.offset = offset;
    2112             : 
    2113             :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    2114             :         if (WARN_ON(ret < 0))
    2115             :                 return ret;
    2116             :         ret = 0;
    2117             : 
    2118             :         while (1) {
    2119             :                 cond_resched();
    2120             : 
    2121             :                 leaf = path->nodes[0];
    2122             :                 slot = path->slots[0];
    2123             : 
    2124             :                 if (slot >= btrfs_header_nritems(leaf)) {
    2125             :                         ret = btrfs_next_leaf(root, path);
    2126             :                         if (ret < 0) {
    2127             :                                 goto out;
    2128             :                         } else if (ret > 0) {
    2129             :                                 ret = 0;
    2130             :                                 goto out;
    2131             :                         }
    2132             :                         continue;
    2133             :                 }
    2134             : 
    2135             :                 path->slots[0]++;
    2136             : 
    2137             :                 btrfs_item_key_to_cpu(leaf, &key, slot);
    2138             : 
    2139             :                 if (key.objectid > inum)
    2140             :                         goto out;
    2141             : 
    2142             :                 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
    2143             :                         continue;
    2144             : 
    2145             :                 extent = btrfs_item_ptr(leaf, slot,
    2146             :                                         struct btrfs_file_extent_item);
    2147             : 
    2148             :                 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
    2149             :                         continue;
    2150             : 
    2151             :                 /*
    2152             :                  * 'offset' refers to the exact key.offset,
    2153             :                  * NOT the 'offset' field in btrfs_extent_data_ref, ie.
    2154             :                  * (key.offset - extent_offset).
    2155             :                  */
    2156             :                 if (key.offset != offset)
    2157             :                         continue;
    2158             : 
    2159             :                 extent_offset = btrfs_file_extent_offset(leaf, extent);
    2160             :                 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
    2161             : 
    2162             :                 if (extent_offset >= old->extent_offset + old->offset +
    2163             :                     old->len || extent_offset + num_bytes <=
    2164             :                     old->extent_offset + old->offset)
    2165             :                         continue;
    2166             :                 break;
    2167             :         }
    2168             : 
    2169             :         backref = kmalloc(sizeof(*backref), GFP_NOFS);
    2170             :         if (!backref) {
    2171             :                 ret = -ENOENT;
    2172             :                 goto out;
    2173             :         }
    2174             : 
    2175             :         backref->root_id = root_id;
    2176             :         backref->inum = inum;
    2177             :         backref->file_pos = offset;
    2178             :         backref->num_bytes = num_bytes;
    2179             :         backref->extent_offset = extent_offset;
    2180             :         backref->generation = btrfs_file_extent_generation(leaf, extent);
    2181             :         backref->old = old;
    2182             :         backref_insert(&new->root, backref);
    2183             :         old->count++;
    2184             : out:
    2185             :         btrfs_release_path(path);
    2186             :         WARN_ON(ret);
    2187             :         return ret;
    2188             : }
    2189             : 
    2190             : static noinline bool record_extent_backrefs(struct btrfs_path *path,
    2191             :                                    struct new_sa_defrag_extent *new)
    2192             : {
    2193             :         struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
    2194             :         struct old_sa_defrag_extent *old, *tmp;
    2195             :         int ret;
    2196             : 
    2197             :         new->path = path;
    2198             : 
    2199             :         list_for_each_entry_safe(old, tmp, &new->head, list) {
    2200             :                 ret = iterate_inodes_from_logical(old->bytenr +
    2201             :                                                   old->extent_offset, fs_info,
    2202             :                                                   path, record_one_backref,
    2203             :                                                   old);
    2204             :                 if (ret < 0 && ret != -ENOENT)
    2205             :                         return false;
    2206             : 
    2207             :                 /* no backref to be processed for this extent */
    2208             :                 if (!old->count) {
    2209             :                         list_del(&old->list);
    2210             :                         kfree(old);
    2211             :                 }
    2212             :         }
    2213             : 
    2214             :         if (list_empty(&new->head))
    2215             :                 return false;
    2216             : 
    2217             :         return true;
    2218             : }
    2219             : 
    2220             : static int relink_is_mergable(struct extent_buffer *leaf,
    2221             :                               struct btrfs_file_extent_item *fi,
    2222             :                               struct new_sa_defrag_extent *new)
    2223             : {
    2224             :         if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
    2225             :                 return 0;
    2226             : 
    2227             :         if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
    2228             :                 return 0;
    2229             : 
    2230             :         if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
    2231             :                 return 0;
    2232             : 
    2233             :         if (btrfs_file_extent_encryption(leaf, fi) ||
    2234             :             btrfs_file_extent_other_encoding(leaf, fi))
    2235             :                 return 0;
    2236             : 
    2237             :         return 1;
    2238             : }
    2239             : 
    2240             : /*
    2241             :  * Note the backref might has changed, and in this case we just return 0.
    2242             :  */
    2243             : static noinline int relink_extent_backref(struct btrfs_path *path,
    2244             :                                  struct sa_defrag_extent_backref *prev,
    2245             :                                  struct sa_defrag_extent_backref *backref)
    2246             : {
    2247             :         struct btrfs_file_extent_item *extent;
    2248             :         struct btrfs_file_extent_item *item;
    2249             :         struct btrfs_ordered_extent *ordered;
    2250             :         struct btrfs_trans_handle *trans;
    2251             :         struct btrfs_fs_info *fs_info;
    2252             :         struct btrfs_root *root;
    2253             :         struct btrfs_key key;
    2254             :         struct extent_buffer *leaf;
    2255             :         struct old_sa_defrag_extent *old = backref->old;
    2256             :         struct new_sa_defrag_extent *new = old->new;
    2257             :         struct inode *src_inode = new->inode;
    2258             :         struct inode *inode;
    2259             :         struct extent_state *cached = NULL;
    2260             :         int ret = 0;
    2261             :         u64 start;
    2262             :         u64 len;
    2263             :         u64 lock_start;
    2264             :         u64 lock_end;
    2265             :         bool merge = false;
    2266             :         int index;
    2267             : 
    2268             :         if (prev && prev->root_id == backref->root_id &&
    2269             :             prev->inum == backref->inum &&
    2270             :             prev->file_pos + prev->num_bytes == backref->file_pos)
    2271             :                 merge = true;
    2272             : 
    2273             :         /* step 1: get root */
    2274             :         key.objectid = backref->root_id;
    2275             :         key.type = BTRFS_ROOT_ITEM_KEY;
    2276             :         key.offset = (u64)-1;
    2277             : 
    2278             :         fs_info = BTRFS_I(src_inode)->root->fs_info;
    2279             :         index = srcu_read_lock(&fs_info->subvol_srcu);
    2280             : 
    2281             :         root = btrfs_read_fs_root_no_name(fs_info, &key);
    2282             :         if (IS_ERR(root)) {
    2283             :                 srcu_read_unlock(&fs_info->subvol_srcu, index);
    2284             :                 if (PTR_ERR(root) == -ENOENT)
    2285             :                         return 0;
    2286             :                 return PTR_ERR(root);
    2287             :         }
    2288             : 
    2289             :         if (btrfs_root_readonly(root)) {
    2290             :                 srcu_read_unlock(&fs_info->subvol_srcu, index);
    2291             :                 return 0;
    2292             :         }
    2293             : 
    2294             :         /* step 2: get inode */
    2295             :         key.objectid = backref->inum;
    2296             :         key.type = BTRFS_INODE_ITEM_KEY;
    2297             :         key.offset = 0;
    2298             : 
    2299             :         inode = btrfs_iget(fs_info->sb, &key, root, NULL);
    2300             :         if (IS_ERR(inode)) {
    2301             :                 srcu_read_unlock(&fs_info->subvol_srcu, index);
    2302             :                 return 0;
    2303             :         }
    2304             : 
    2305             :         srcu_read_unlock(&fs_info->subvol_srcu, index);
    2306             : 
    2307             :         /* step 3: relink backref */
    2308             :         lock_start = backref->file_pos;
    2309             :         lock_end = backref->file_pos + backref->num_bytes - 1;
    2310             :         lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
    2311             :                          0, &cached);
    2312             : 
    2313             :         ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
    2314             :         if (ordered) {
    2315             :                 btrfs_put_ordered_extent(ordered);
    2316             :                 goto out_unlock;
    2317             :         }
    2318             : 
    2319             :         trans = btrfs_join_transaction(root);
    2320             :         if (IS_ERR(trans)) {
    2321             :                 ret = PTR_ERR(trans);
    2322             :                 goto out_unlock;
    2323             :         }
    2324             : 
    2325             :         key.objectid = backref->inum;
    2326             :         key.type = BTRFS_EXTENT_DATA_KEY;
    2327             :         key.offset = backref->file_pos;
    2328             : 
    2329             :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    2330             :         if (ret < 0) {
    2331             :                 goto out_free_path;
    2332             :         } else if (ret > 0) {
    2333             :                 ret = 0;
    2334             :                 goto out_free_path;
    2335             :         }
    2336             : 
    2337             :         extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2338             :                                 struct btrfs_file_extent_item);
    2339             : 
    2340             :         if (btrfs_file_extent_generation(path->nodes[0], extent) !=
    2341             :             backref->generation)
    2342             :                 goto out_free_path;
    2343             : 
    2344             :         btrfs_release_path(path);
    2345             : 
    2346             :         start = backref->file_pos;
    2347             :         if (backref->extent_offset < old->extent_offset + old->offset)
    2348             :                 start += old->extent_offset + old->offset -
    2349             :                          backref->extent_offset;
    2350             : 
    2351             :         len = min(backref->extent_offset + backref->num_bytes,
    2352             :                   old->extent_offset + old->offset + old->len);
    2353             :         len -= max(backref->extent_offset, old->extent_offset + old->offset);
    2354             : 
    2355             :         ret = btrfs_drop_extents(trans, root, inode, start,
    2356             :                                  start + len, 1);
    2357             :         if (ret)
    2358             :                 goto out_free_path;
    2359             : again:
    2360             :         key.objectid = btrfs_ino(inode);
    2361             :         key.type = BTRFS_EXTENT_DATA_KEY;
    2362             :         key.offset = start;
    2363             : 
    2364             :         path->leave_spinning = 1;
    2365             :         if (merge) {
    2366             :                 struct btrfs_file_extent_item *fi;
    2367             :                 u64 extent_len;
    2368             :                 struct btrfs_key found_key;
    2369             : 
    2370             :                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
    2371             :                 if (ret < 0)
    2372             :                         goto out_free_path;
    2373             : 
    2374             :                 path->slots[0]--;
    2375             :                 leaf = path->nodes[0];
    2376             :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    2377             : 
    2378             :                 fi = btrfs_item_ptr(leaf, path->slots[0],
    2379             :                                     struct btrfs_file_extent_item);
    2380             :                 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
    2381             : 
    2382             :                 if (extent_len + found_key.offset == start &&
    2383             :                     relink_is_mergable(leaf, fi, new)) {
    2384             :                         btrfs_set_file_extent_num_bytes(leaf, fi,
    2385             :                                                         extent_len + len);
    2386             :                         btrfs_mark_buffer_dirty(leaf);
    2387             :                         inode_add_bytes(inode, len);
    2388             : 
    2389             :                         ret = 1;
    2390             :                         goto out_free_path;
    2391             :                 } else {
    2392             :                         merge = false;
    2393             :                         btrfs_release_path(path);
    2394             :                         goto again;
    2395             :                 }
    2396             :         }
    2397             : 
    2398             :         ret = btrfs_insert_empty_item(trans, root, path, &key,
    2399             :                                         sizeof(*extent));
    2400             :         if (ret) {
    2401             :                 btrfs_abort_transaction(trans, root, ret);
    2402             :                 goto out_free_path;
    2403             :         }
    2404             : 
    2405             :         leaf = path->nodes[0];
    2406             :         item = btrfs_item_ptr(leaf, path->slots[0],
    2407             :                                 struct btrfs_file_extent_item);
    2408             :         btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
    2409             :         btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
    2410             :         btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
    2411             :         btrfs_set_file_extent_num_bytes(leaf, item, len);
    2412             :         btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
    2413             :         btrfs_set_file_extent_generation(leaf, item, trans->transid);
    2414             :         btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
    2415             :         btrfs_set_file_extent_compression(leaf, item, new->compress_type);
    2416             :         btrfs_set_file_extent_encryption(leaf, item, 0);
    2417             :         btrfs_set_file_extent_other_encoding(leaf, item, 0);
    2418             : 
    2419             :         btrfs_mark_buffer_dirty(leaf);
    2420             :         inode_add_bytes(inode, len);
    2421             :         btrfs_release_path(path);
    2422             : 
    2423             :         ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
    2424             :                         new->disk_len, 0,
    2425             :                         backref->root_id, backref->inum,
    2426             :                         new->file_pos, 0);   /* start - extent_offset */
    2427             :         if (ret) {
    2428             :                 btrfs_abort_transaction(trans, root, ret);
    2429             :                 goto out_free_path;
    2430             :         }
    2431             : 
    2432             :         ret = 1;
    2433             : out_free_path:
    2434             :         btrfs_release_path(path);
    2435             :         path->leave_spinning = 0;
    2436             :         btrfs_end_transaction(trans, root);
    2437             : out_unlock:
    2438             :         unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
    2439             :                              &cached, GFP_NOFS);
    2440             :         iput(inode);
    2441             :         return ret;
    2442             : }
    2443             : 
    2444             : static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
    2445             : {
    2446             :         struct old_sa_defrag_extent *old, *tmp;
    2447             : 
    2448             :         if (!new)
    2449             :                 return;
    2450             : 
    2451             :         list_for_each_entry_safe(old, tmp, &new->head, list) {
    2452             :                 list_del(&old->list);
    2453             :                 kfree(old);
    2454             :         }
    2455             :         kfree(new);
    2456             : }
    2457             : 
    2458             : static void relink_file_extents(struct new_sa_defrag_extent *new)
    2459             : {
    2460             :         struct btrfs_path *path;
    2461             :         struct sa_defrag_extent_backref *backref;
    2462             :         struct sa_defrag_extent_backref *prev = NULL;
    2463             :         struct inode *inode;
    2464             :         struct btrfs_root *root;
    2465             :         struct rb_node *node;
    2466             :         int ret;
    2467             : 
    2468             :         inode = new->inode;
    2469             :         root = BTRFS_I(inode)->root;
    2470             : 
    2471             :         path = btrfs_alloc_path();
    2472             :         if (!path)
    2473             :                 return;
    2474             : 
    2475             :         if (!record_extent_backrefs(path, new)) {
    2476             :                 btrfs_free_path(path);
    2477             :                 goto out;
    2478             :         }
    2479             :         btrfs_release_path(path);
    2480             : 
    2481             :         while (1) {
    2482             :                 node = rb_first(&new->root);
    2483             :                 if (!node)
    2484             :                         break;
    2485             :                 rb_erase(node, &new->root);
    2486             : 
    2487             :                 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
    2488             : 
    2489             :                 ret = relink_extent_backref(path, prev, backref);
    2490             :                 WARN_ON(ret < 0);
    2491             : 
    2492             :                 kfree(prev);
    2493             : 
    2494             :                 if (ret == 1)
    2495             :                         prev = backref;
    2496             :                 else
    2497             :                         prev = NULL;
    2498             :                 cond_resched();
    2499             :         }
    2500             :         kfree(prev);
    2501             : 
    2502             :         btrfs_free_path(path);
    2503             : out:
    2504             :         free_sa_defrag_extent(new);
    2505             : 
    2506             :         atomic_dec(&root->fs_info->defrag_running);
    2507             :         wake_up(&root->fs_info->transaction_wait);
    2508             : }
    2509             : 
    2510             : static struct new_sa_defrag_extent *
    2511             : record_old_file_extents(struct inode *inode,
    2512             :                         struct btrfs_ordered_extent *ordered)
    2513             : {
    2514             :         struct btrfs_root *root = BTRFS_I(inode)->root;
    2515             :         struct btrfs_path *path;
    2516             :         struct btrfs_key key;
    2517             :         struct old_sa_defrag_extent *old;
    2518             :         struct new_sa_defrag_extent *new;
    2519             :         int ret;
    2520             : 
    2521             :         new = kmalloc(sizeof(*new), GFP_NOFS);
    2522             :         if (!new)
    2523             :                 return NULL;
    2524             : 
    2525             :         new->inode = inode;
    2526             :         new->file_pos = ordered->file_offset;
    2527             :         new->len = ordered->len;
    2528             :         new->bytenr = ordered->start;
    2529             :         new->disk_len = ordered->disk_len;
    2530             :         new->compress_type = ordered->compress_type;
    2531             :         new->root = RB_ROOT;
    2532             :         INIT_LIST_HEAD(&new->head);
    2533             : 
    2534             :         path = btrfs_alloc_path();
    2535             :         if (!path)
    2536             :                 goto out_kfree;
    2537             : 
    2538             :         key.objectid = btrfs_ino(inode);
    2539             :         key.type = BTRFS_EXTENT_DATA_KEY;
    2540             :         key.offset = new->file_pos;
    2541             : 
    2542             :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    2543             :         if (ret < 0)
    2544             :                 goto out_free_path;
    2545             :         if (ret > 0 && path->slots[0] > 0)
    2546             :                 path->slots[0]--;
    2547             : 
    2548             :         /* find out all the old extents for the file range */
    2549             :         while (1) {
    2550             :                 struct btrfs_file_extent_item *extent;
    2551             :                 struct extent_buffer *l;
    2552             :                 int slot;
    2553             :                 u64 num_bytes;
    2554             :                 u64 offset;
    2555             :                 u64 end;
    2556             :                 u64 disk_bytenr;
    2557             :                 u64 extent_offset;
    2558             : 
    2559             :                 l = path->nodes[0];
    2560             :                 slot = path->slots[0];
    2561             : 
    2562             :                 if (slot >= btrfs_header_nritems(l)) {
    2563             :                         ret = btrfs_next_leaf(root, path);
    2564             :                         if (ret < 0)
    2565             :                                 goto out_free_path;
    2566             :                         else if (ret > 0)
    2567             :                                 break;
    2568             :                         continue;
    2569             :                 }
    2570             : 
    2571             :                 btrfs_item_key_to_cpu(l, &key, slot);
    2572             : 
    2573             :                 if (key.objectid != btrfs_ino(inode))
    2574             :                         break;
    2575             :                 if (key.type != BTRFS_EXTENT_DATA_KEY)
    2576             :                         break;
    2577             :                 if (key.offset >= new->file_pos + new->len)
    2578             :                         break;
    2579             : 
    2580             :                 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
    2581             : 
    2582             :                 num_bytes = btrfs_file_extent_num_bytes(l, extent);
    2583             :                 if (key.offset + num_bytes < new->file_pos)
    2584             :                         goto next;
    2585             : 
    2586             :                 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
    2587             :                 if (!disk_bytenr)
    2588             :                         goto next;
    2589             : 
    2590             :                 extent_offset = btrfs_file_extent_offset(l, extent);
    2591             : 
    2592             :                 old = kmalloc(sizeof(*old), GFP_NOFS);
    2593             :                 if (!old)
    2594             :                         goto out_free_path;
    2595             : 
    2596             :                 offset = max(new->file_pos, key.offset);
    2597             :                 end = min(new->file_pos + new->len, key.offset + num_bytes);
    2598             : 
    2599             :                 old->bytenr = disk_bytenr;
    2600             :                 old->extent_offset = extent_offset;
    2601             :                 old->offset = offset - key.offset;
    2602             :                 old->len = end - offset;
    2603             :                 old->new = new;
    2604             :                 old->count = 0;
    2605             :                 list_add_tail(&old->list, &new->head);
    2606             : next:
    2607             :                 path->slots[0]++;
    2608             :                 cond_resched();
    2609             :         }
    2610             : 
    2611             :         btrfs_free_path(path);
    2612             :         atomic_inc(&root->fs_info->defrag_running);
    2613             : 
    2614             :         return new;
    2615             : 
    2616             : out_free_path:
    2617             :         btrfs_free_path(path);
    2618             : out_kfree:
    2619             :         free_sa_defrag_extent(new);
    2620             :         return NULL;
    2621             : }
    2622             : 
    2623       45797 : static void btrfs_release_delalloc_bytes(struct btrfs_root *root,
    2624             :                                          u64 start, u64 len)
    2625             : {
    2626             :         struct btrfs_block_group_cache *cache;
    2627             : 
    2628       45797 :         cache = btrfs_lookup_block_group(root->fs_info, start);
    2629             :         ASSERT(cache);
    2630             : 
    2631             :         spin_lock(&cache->lock);
    2632       45798 :         cache->delalloc_bytes -= len;
    2633             :         spin_unlock(&cache->lock);
    2634             : 
    2635       45798 :         btrfs_put_block_group(cache);
    2636       45797 : }
    2637             : 
    2638             : /* as ordered data IO finishes, this gets called so we can finish
    2639             :  * an ordered extent if the range of bytes in the file it covers are
    2640             :  * fully written.
    2641             :  */
    2642       51505 : static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
    2643             : {
    2644       51505 :         struct inode *inode = ordered_extent->inode;
    2645       97302 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    2646             :         struct btrfs_trans_handle *trans = NULL;
    2647       51505 :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
    2648       51505 :         struct extent_state *cached_state = NULL;
    2649             :         struct new_sa_defrag_extent *new = NULL;
    2650             :         int compress_type = 0;
    2651             :         int ret = 0;
    2652       51505 :         u64 logical_len = ordered_extent->len;
    2653             :         bool nolock;
    2654             :         bool truncated = false;
    2655             : 
    2656       51505 :         nolock = btrfs_is_free_space_inode(inode);
    2657             : 
    2658       51505 :         if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
    2659             :                 ret = -EIO;
    2660             :                 goto out;
    2661             :         }
    2662             : 
    2663       51505 :         if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
    2664             :                 truncated = true;
    2665           0 :                 logical_len = ordered_extent->truncated_len;
    2666             :                 /* Truncated the entire extent, don't bother adding */
    2667           0 :                 if (!logical_len)
    2668             :                         goto out;
    2669             :         }
    2670             : 
    2671       51505 :         if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
    2672         164 :                 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
    2673          82 :                 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
    2674          82 :                 if (nolock)
    2675          82 :                         trans = btrfs_join_transaction_nolock(root);
    2676             :                 else
    2677           0 :                         trans = btrfs_join_transaction(root);
    2678          82 :                 if (IS_ERR(trans)) {
    2679           0 :                         ret = PTR_ERR(trans);
    2680             :                         trans = NULL;
    2681           0 :                         goto out;
    2682             :                 }
    2683          82 :                 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
    2684          82 :                 ret = btrfs_update_inode_fallback(trans, root, inode);
    2685          82 :                 if (ret) /* -ENOMEM or corruption */
    2686           0 :                         btrfs_abort_transaction(trans, root, ret);
    2687             :                 goto out;
    2688             :         }
    2689             : 
    2690       51423 :         lock_extent_bits(io_tree, ordered_extent->file_offset,
    2691       51423 :                          ordered_extent->file_offset + ordered_extent->len - 1,
    2692             :                          0, &cached_state);
    2693             : 
    2694      102830 :         ret = test_range_bit(io_tree, ordered_extent->file_offset,
    2695       51415 :                         ordered_extent->file_offset + ordered_extent->len - 1,
    2696             :                         EXTENT_DEFRAG, 1, cached_state);
    2697       51419 :         if (ret) {
    2698             :                 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
    2699             :                 if (0 && last_snapshot >= BTRFS_I(inode)->generation)
    2700             :                         /* the inode is shared */
    2701             :                         new = record_old_file_extents(inode, ordered_extent);
    2702             : 
    2703          66 :                 clear_extent_bit(io_tree, ordered_extent->file_offset,
    2704          66 :                         ordered_extent->file_offset + ordered_extent->len - 1,
    2705             :                         EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
    2706             :         }
    2707             : 
    2708       51419 :         if (nolock)
    2709        3975 :                 trans = btrfs_join_transaction_nolock(root);
    2710             :         else
    2711       47444 :                 trans = btrfs_join_transaction(root);
    2712       51417 :         if (IS_ERR(trans)) {
    2713           0 :                 ret = PTR_ERR(trans);
    2714             :                 trans = NULL;
    2715           0 :                 goto out_unlock;
    2716             :         }
    2717             : 
    2718       51417 :         trans->block_rsv = &root->fs_info->delalloc_block_rsv;
    2719             : 
    2720       51417 :         if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
    2721         153 :                 compress_type = ordered_extent->compress_type;
    2722       51417 :         if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
    2723        5625 :                 BUG_ON(compress_type);
    2724        5625 :                 ret = btrfs_mark_extent_written(trans, inode,
    2725             :                                                 ordered_extent->file_offset,
    2726        5625 :                                                 ordered_extent->file_offset +
    2727             :                                                 logical_len);
    2728             :         } else {
    2729       45792 :                 BUG_ON(root == root->fs_info->tree_root);
    2730       45792 :                 ret = insert_reserved_file_extent(trans, inode,
    2731             :                                                 ordered_extent->file_offset,
    2732             :                                                 ordered_extent->start,
    2733             :                                                 ordered_extent->disk_len,
    2734             :                                                 logical_len, logical_len,
    2735             :                                                 compress_type, 0, 0,
    2736             :                                                 BTRFS_FILE_EXTENT_REG);
    2737       45797 :                 if (!ret)
    2738       91594 :                         btrfs_release_delalloc_bytes(root,
    2739             :                                                      ordered_extent->start,
    2740             :                                                      ordered_extent->disk_len);
    2741             :         }
    2742       51422 :         unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
    2743             :                            ordered_extent->file_offset, ordered_extent->len,
    2744             :                            trans->transid);
    2745       51421 :         if (ret < 0) {
    2746           0 :                 btrfs_abort_transaction(trans, root, ret);
    2747           0 :                 goto out_unlock;
    2748             :         }
    2749             : 
    2750       51421 :         add_pending_csums(trans, inode, ordered_extent->file_offset,
    2751             :                           &ordered_extent->list);
    2752             : 
    2753       51423 :         btrfs_ordered_update_i_size(inode, 0, ordered_extent);
    2754       51423 :         ret = btrfs_update_inode_fallback(trans, root, inode);
    2755       51422 :         if (ret) { /* -ENOMEM or corruption */
    2756           0 :                 btrfs_abort_transaction(trans, root, ret);
    2757           0 :                 goto out_unlock;
    2758             :         }
    2759             :         ret = 0;
    2760             : out_unlock:
    2761       51422 :         unlock_extent_cached(io_tree, ordered_extent->file_offset,
    2762      102844 :                              ordered_extent->file_offset +
    2763       51422 :                              ordered_extent->len - 1, &cached_state, GFP_NOFS);
    2764             : out:
    2765       51504 :         if (root != root->fs_info->tree_root)
    2766       47447 :                 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
    2767       51505 :         if (trans)
    2768       51505 :                 btrfs_end_transaction(trans, root);
    2769             : 
    2770       51505 :         if (ret || truncated) {
    2771             :                 u64 start, end;
    2772             : 
    2773           0 :                 if (truncated)
    2774           0 :                         start = ordered_extent->file_offset + logical_len;
    2775             :                 else
    2776           0 :                         start = ordered_extent->file_offset;
    2777           0 :                 end = ordered_extent->file_offset + ordered_extent->len - 1;
    2778           0 :                 clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
    2779             : 
    2780             :                 /* Drop the cache for the part of the extent we didn't write. */
    2781           0 :                 btrfs_drop_extent_cache(inode, start, end, 0);
    2782             : 
    2783             :                 /*
    2784             :                  * If the ordered extent had an IOERR or something else went
    2785             :                  * wrong we need to return the space for this ordered extent
    2786             :                  * back to the allocator.  We only free the extent in the
    2787             :                  * truncated case if we didn't write out the extent at all.
    2788             :                  */
    2789           0 :                 if ((ret || !logical_len) &&
    2790           0 :                     !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
    2791             :                     !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
    2792           0 :                         btrfs_free_reserved_extent(root, ordered_extent->start,
    2793             :                                                    ordered_extent->disk_len, 1);
    2794             :         }
    2795             : 
    2796             : 
    2797             :         /*
    2798             :          * This needs to be done to make sure anybody waiting knows we are done
    2799             :          * updating everything for this ordered extent.
    2800             :          */
    2801       51505 :         btrfs_remove_ordered_extent(inode, ordered_extent);
    2802             : 
    2803             :         /* for snapshot-aware defrag */
    2804             :         if (new) {
    2805             :                 if (ret) {
    2806             :                         free_sa_defrag_extent(new);
    2807             :                         atomic_dec(&root->fs_info->defrag_running);
    2808             :                 } else {
    2809             :                         relink_file_extents(new);
    2810             :                 }
    2811             :         }
    2812             : 
    2813             :         /* once for us */
    2814       51505 :         btrfs_put_ordered_extent(ordered_extent);
    2815             :         /* once for the tree */
    2816       51505 :         btrfs_put_ordered_extent(ordered_extent);
    2817             : 
    2818       51505 :         return ret;
    2819             : }
    2820             : 
    2821       51505 : static void finish_ordered_fn(struct btrfs_work *work)
    2822             : {
    2823             :         struct btrfs_ordered_extent *ordered_extent;
    2824       51505 :         ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
    2825       51505 :         btrfs_finish_ordered_io(ordered_extent);
    2826       51505 : }
    2827             : 
    2828     1309032 : static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
    2829             :                                 struct extent_state *state, int uptodate)
    2830             : {
    2831     1309032 :         struct inode *inode = page->mapping->host;
    2832     1309032 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    2833     1309032 :         struct btrfs_ordered_extent *ordered_extent = NULL;
    2834             :         struct btrfs_workqueue *wq;
    2835             :         btrfs_work_func_t func;
    2836             : 
    2837     1309032 :         trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
    2838             : 
    2839             :         ClearPagePrivate2(page);
    2840     1309063 :         if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
    2841     1309063 :                                             end - start + 1, uptodate))
    2842             :                 return 0;
    2843             : 
    2844       26245 :         if (btrfs_is_free_space_inode(inode)) {
    2845        4057 :                 wq = root->fs_info->endio_freespace_worker;
    2846             :                 func = btrfs_freespace_write_helper;
    2847             :         } else {
    2848       22188 :                 wq = root->fs_info->endio_write_workers;
    2849             :                 func = btrfs_endio_write_helper;
    2850             :         }
    2851             : 
    2852       26245 :         btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
    2853             :                         NULL);
    2854       26245 :         btrfs_queue_work(wq, &ordered_extent->work);
    2855             : 
    2856       26245 :         return 0;
    2857             : }
    2858             : 
    2859             : /*
    2860             :  * when reads are done, we need to check csums to verify the data is correct
    2861             :  * if there's a match, we allow the bio to finish.  If not, the code in
    2862             :  * extent_io.c will try to find good copies for us.
    2863             :  */
    2864       72320 : static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
    2865       72320 :                                       u64 phy_offset, struct page *page,
    2866             :                                       u64 start, u64 end, int mirror)
    2867             : {
    2868       72320 :         size_t offset = start - page_offset(page);
    2869       72320 :         struct inode *inode = page->mapping->host;
    2870       72320 :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
    2871             :         char *kaddr;
    2872       72320 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    2873             :         u32 csum_expected;
    2874       72320 :         u32 csum = ~(u32)0;
    2875             :         static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
    2876             :                                       DEFAULT_RATELIMIT_BURST);
    2877             : 
    2878       72320 :         if (PageChecked(page)) {
    2879             :                 ClearPageChecked(page);
    2880             :                 goto good;
    2881             :         }
    2882             : 
    2883       71988 :         if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
    2884             :                 goto good;
    2885             : 
    2886       63248 :         if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
    2887        1276 :             test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
    2888         254 :                 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
    2889             :                                   GFP_NOFS);
    2890         254 :                 return 0;
    2891             :         }
    2892             : 
    2893       61718 :         phy_offset >>= inode->i_sb->s_blocksize_bits;
    2894       61718 :         csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
    2895             : 
    2896             :         kaddr = kmap_atomic(page);
    2897       61711 :         csum = btrfs_csum_data(kaddr + offset, csum,  end - start + 1);
    2898       61658 :         btrfs_csum_final(csum, (char *)&csum);
    2899       61659 :         if (csum != csum_expected)
    2900             :                 goto zeroit;
    2901             : 
    2902             :         kunmap_atomic(kaddr);
    2903             : good:
    2904             :         return 0;
    2905             : 
    2906             : zeroit:
    2907           0 :         if (__ratelimit(&_rs))
    2908           0 :                 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
    2909             :                         btrfs_ino(page->mapping->host), start, csum, csum_expected);
    2910           0 :         memset(kaddr + offset, 1, end - start + 1);
    2911             :         flush_dcache_page(page);
    2912             :         kunmap_atomic(kaddr);
    2913           0 :         if (csum_expected == 0)
    2914             :                 return 0;
    2915           0 :         return -EIO;
    2916             : }
    2917             : 
    2918             : struct delayed_iput {
    2919             :         struct list_head list;
    2920             :         struct inode *inode;
    2921             : };
    2922             : 
    2923             : /* JDM: If this is fs-wide, why can't we add a pointer to
    2924             :  * btrfs_inode instead and avoid the allocation? */
    2925       86695 : void btrfs_add_delayed_iput(struct inode *inode)
    2926             : {
    2927       86695 :         struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
    2928             :         struct delayed_iput *delayed;
    2929             : 
    2930       86702 :         if (atomic_add_unless(&inode->i_count, -1, 1))
    2931       86702 :                 return;
    2932             : 
    2933             :         delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
    2934         581 :         delayed->inode = inode;
    2935             : 
    2936             :         spin_lock(&fs_info->delayed_iput_lock);
    2937         581 :         list_add_tail(&delayed->list, &fs_info->delayed_iputs);
    2938             :         spin_unlock(&fs_info->delayed_iput_lock);
    2939             : }
    2940             : 
    2941        6133 : void btrfs_run_delayed_iputs(struct btrfs_root *root)
    2942             : {
    2943        6133 :         LIST_HEAD(list);
    2944        6133 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2945             :         struct delayed_iput *delayed;
    2946             :         int empty;
    2947             : 
    2948             :         spin_lock(&fs_info->delayed_iput_lock);
    2949        6133 :         empty = list_empty(&fs_info->delayed_iputs);
    2950             :         spin_unlock(&fs_info->delayed_iput_lock);
    2951        6133 :         if (empty)
    2952        5961 :                 return;
    2953             : 
    2954             :         spin_lock(&fs_info->delayed_iput_lock);
    2955             :         list_splice_init(&fs_info->delayed_iputs, &list);
    2956             :         spin_unlock(&fs_info->delayed_iput_lock);
    2957             : 
    2958         753 :         while (!list_empty(&list)) {
    2959             :                 delayed = list_entry(list.next, struct delayed_iput, list);
    2960         581 :                 list_del(&delayed->list);
    2961         581 :                 iput(delayed->inode);
    2962         581 :                 kfree(delayed);
    2963             :         }
    2964             : }
    2965             : 
    2966             : /*
    2967             :  * This is called in transaction commit time. If there are no orphan
    2968             :  * files in the subvolume, it removes orphan item and frees block_rsv
    2969             :  * structure.
    2970             :  */
    2971        2548 : void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
    2972             :                               struct btrfs_root *root)
    2973             : {
    2974             :         struct btrfs_block_rsv *block_rsv;
    2975             :         int ret;
    2976             : 
    2977        4858 :         if (atomic_read(&root->orphan_inodes) ||
    2978        2310 :             root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
    2979             :                 return;
    2980             : 
    2981             :         spin_lock(&root->orphan_lock);
    2982        2259 :         if (atomic_read(&root->orphan_inodes)) {
    2983             :                 spin_unlock(&root->orphan_lock);
    2984             :                 return;
    2985             :         }
    2986             : 
    2987        2259 :         if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
    2988             :                 spin_unlock(&root->orphan_lock);
    2989             :                 return;
    2990             :         }
    2991             : 
    2992        2259 :         block_rsv = root->orphan_block_rsv;
    2993        2259 :         root->orphan_block_rsv = NULL;
    2994             :         spin_unlock(&root->orphan_lock);
    2995             : 
    2996        3063 :         if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
    2997             :             btrfs_root_refs(&root->root_item) > 0) {
    2998           0 :                 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
    2999             :                                             root->root_key.objectid);
    3000           0 :                 if (ret)
    3001           0 :                         btrfs_abort_transaction(trans, root, ret);
    3002             :                 else
    3003             :                         clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
    3004             :                                   &root->state);
    3005             :         }
    3006             : 
    3007        2259 :         if (block_rsv) {
    3008         511 :                 WARN_ON(block_rsv->size > 0);
    3009         511 :                 btrfs_free_block_rsv(root, block_rsv);
    3010             :         }
    3011             : }
    3012             : 
    3013             : /*
    3014             :  * This creates an orphan entry for the given inode in case something goes
    3015             :  * wrong in the middle of an unlink/truncate.
    3016             :  *
    3017             :  * NOTE: caller of this function should reserve 5 units of metadata for
    3018             :  *       this function.
    3019             :  */
    3020        9630 : int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
    3021             : {
    3022        9630 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    3023             :         struct btrfs_block_rsv *block_rsv = NULL;
    3024             :         int reserve = 0;
    3025             :         int insert = 0;
    3026             :         int ret;
    3027             : 
    3028        9630 :         if (!root->orphan_block_rsv) {
    3029         513 :                 block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
    3030         513 :                 if (!block_rsv)
    3031             :                         return -ENOMEM;
    3032             :         }
    3033             : 
    3034             :         spin_lock(&root->orphan_lock);
    3035        9630 :         if (!root->orphan_block_rsv) {
    3036         513 :                 root->orphan_block_rsv = block_rsv;
    3037        9117 :         } else if (block_rsv) {
    3038           0 :                 btrfs_free_block_rsv(root, block_rsv);
    3039             :                 block_rsv = NULL;
    3040             :         }
    3041             : 
    3042        9630 :         if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
    3043        9630 :                               &BTRFS_I(inode)->runtime_flags)) {
    3044             : #if 0
    3045             :                 /*
    3046             :                  * For proper ENOSPC handling, we should do orphan
    3047             :                  * cleanup when mounting. But this introduces backward
    3048             :                  * compatibility issue.
    3049             :                  */
    3050             :                 if (!xchg(&root->orphan_item_inserted, 1))
    3051             :                         insert = 2;
    3052             :                 else
    3053             :                         insert = 1;
    3054             : #endif
    3055             :                 insert = 1;
    3056        9630 :                 atomic_inc(&root->orphan_inodes);
    3057             :         }
    3058             : 
    3059        9630 :         if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
    3060             :                               &BTRFS_I(inode)->runtime_flags))
    3061             :                 reserve = 1;
    3062             :         spin_unlock(&root->orphan_lock);
    3063             : 
    3064             :         /* grab metadata reservation from transaction handle */
    3065        9630 :         if (reserve) {
    3066        9630 :                 ret = btrfs_orphan_reserve_metadata(trans, inode);
    3067        9630 :                 BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */
    3068             :         }
    3069             : 
    3070             :         /* insert an orphan item to track this unlinked/truncated file */
    3071        9630 :         if (insert >= 1) {
    3072        9630 :                 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
    3073        9630 :                 if (ret) {
    3074           0 :                         atomic_dec(&root->orphan_inodes);
    3075           0 :                         if (reserve) {
    3076             :                                 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
    3077             :                                           &BTRFS_I(inode)->runtime_flags);
    3078           0 :                                 btrfs_orphan_release_metadata(inode);
    3079             :                         }
    3080           0 :                         if (ret != -EEXIST) {
    3081             :                                 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
    3082             :                                           &BTRFS_I(inode)->runtime_flags);
    3083           0 :                                 btrfs_abort_transaction(trans, root, ret);
    3084           0 :                                 return ret;
    3085             :                         }
    3086             :                 }
    3087             :                 ret = 0;
    3088             :         }
    3089             : 
    3090             :         /* insert an orphan item to track subvolume contains orphan files */
    3091        9630 :         if (insert >= 2) {
    3092           0 :                 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
    3093             :                                                root->root_key.objectid);
    3094           0 :                 if (ret && ret != -EEXIST) {
    3095           0 :                         btrfs_abort_transaction(trans, root, ret);
    3096           0 :                         return ret;
    3097             :                 }
    3098             :         }
    3099             :         return 0;
    3100             : }
    3101             : 
    3102             : /*
    3103             :  * We have done the truncate/delete so we can go ahead and remove the orphan
    3104             :  * item for this particular inode.
    3105             :  */
    3106        9631 : static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
    3107             :                             struct inode *inode)
    3108             : {
    3109        9631 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    3110             :         int delete_item = 0;
    3111             :         int release_rsv = 0;
    3112             :         int ret = 0;
    3113             : 
    3114             :         spin_lock(&root->orphan_lock);
    3115        9631 :         if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
    3116        9631 :                                &BTRFS_I(inode)->runtime_flags))
    3117             :                 delete_item = 1;
    3118             : 
    3119        9631 :         if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
    3120             :                                &BTRFS_I(inode)->runtime_flags))
    3121             :                 release_rsv = 1;
    3122             :         spin_unlock(&root->orphan_lock);
    3123             : 
    3124        9631 :         if (delete_item) {
    3125        9631 :                 atomic_dec(&root->orphan_inodes);
    3126        9631 :                 if (trans)
    3127        9631 :                         ret = btrfs_del_orphan_item(trans, root,
    3128             :                                                     btrfs_ino(inode));
    3129             :         }
    3130             : 
    3131        9631 :         if (release_rsv)
    3132        9630 :                 btrfs_orphan_release_metadata(inode);
    3133             : 
    3134        9631 :         return ret;
    3135             : }
    3136             : 
    3137             : /*
    3138             :  * this cleans up any orphans that may be left on the list from the last use
    3139             :  * of this root.
    3140             :  */
    3141         785 : int btrfs_orphan_cleanup(struct btrfs_root *root)
    3142             : {
    3143             :         struct btrfs_path *path;
    3144             :         struct extent_buffer *leaf;
    3145             :         struct btrfs_key key, found_key;
    3146             :         struct btrfs_trans_handle *trans;
    3147             :         struct inode *inode;
    3148             :         u64 last_objectid = 0;
    3149             :         int ret = 0, nr_unlink = 0, nr_truncate = 0;
    3150             : 
    3151         785 :         if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
    3152             :                 return 0;
    3153             : 
    3154         778 :         path = btrfs_alloc_path();
    3155         778 :         if (!path) {
    3156             :                 ret = -ENOMEM;
    3157             :                 goto out;
    3158             :         }
    3159         778 :         path->reada = -1;
    3160             : 
    3161         778 :         key.objectid = BTRFS_ORPHAN_OBJECTID;
    3162             :         btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
    3163         778 :         key.offset = (u64)-1;
    3164             : 
    3165             :         while (1) {
    3166         851 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    3167         851 :                 if (ret < 0)
    3168             :                         goto out;
    3169             : 
    3170             :                 /*
    3171             :                  * if ret == 0 means we found what we were searching for, which
    3172             :                  * is weird, but possible, so only screw with path if we didn't
    3173             :                  * find the key and see if we have stuff that matches
    3174             :                  */
    3175         851 :                 if (ret > 0) {
    3176             :                         ret = 0;
    3177         794 :                         if (path->slots[0] == 0)
    3178             :                                 break;
    3179         794 :                         path->slots[0]--;
    3180             :                 }
    3181             : 
    3182             :                 /* pull out the item */
    3183         851 :                 leaf = path->nodes[0];
    3184         851 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    3185             : 
    3186             :                 /* make sure the item matches what we want */
    3187         851 :                 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
    3188             :                         break;
    3189          73 :                 if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY)
    3190             :                         break;
    3191             : 
    3192             :                 /* release the path since we're done with it */
    3193          73 :                 btrfs_release_path(path);
    3194             : 
    3195             :                 /*
    3196             :                  * this is where we are basically btrfs_lookup, without the
    3197             :                  * crossing root thing.  we store the inode number in the
    3198             :                  * offset of the orphan item.
    3199             :                  */
    3200             : 
    3201          73 :                 if (found_key.offset == last_objectid) {
    3202           0 :                         btrfs_err(root->fs_info,
    3203             :                                 "Error removing orphan entry, stopping orphan cleanup");
    3204             :                         ret = -EINVAL;
    3205           0 :                         goto out;
    3206             :                 }
    3207             : 
    3208             :                 last_objectid = found_key.offset;
    3209             : 
    3210          73 :                 found_key.objectid = found_key.offset;
    3211          73 :                 found_key.type = BTRFS_INODE_ITEM_KEY;
    3212          73 :                 found_key.offset = 0;
    3213          73 :                 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
    3214             :                 ret = PTR_ERR_OR_ZERO(inode);
    3215          73 :                 if (ret && ret != -ESTALE)
    3216             :                         goto out;
    3217             : 
    3218          73 :                 if (ret == -ESTALE && root == root->fs_info->tree_root) {
    3219             :                         struct btrfs_root *dead_root;
    3220             :                         struct btrfs_fs_info *fs_info = root->fs_info;
    3221             :                         int is_dead_root = 0;
    3222             : 
    3223             :                         /*
    3224             :                          * this is an orphan in the tree root. Currently these
    3225             :                          * could come from 2 sources:
    3226             :                          *  a) a snapshot deletion in progress
    3227             :                          *  b) a free space cache inode
    3228             :                          * We need to distinguish those two, as the snapshot
    3229             :                          * orphan must not get deleted.
    3230             :                          * find_dead_roots already ran before us, so if this
    3231             :                          * is a snapshot deletion, we should find the root
    3232             :                          * in the dead_roots list
    3233             :                          */
    3234             :                         spin_lock(&fs_info->trans_lock);
    3235         386 :                         list_for_each_entry(dead_root, &fs_info->dead_roots,
    3236             :                                             root_list) {
    3237         772 :                                 if (dead_root->root_key.objectid ==
    3238         386 :                                     found_key.objectid) {
    3239             :                                         is_dead_root = 1;
    3240             :                                         break;
    3241             :                                 }
    3242             :                         }
    3243             :                         spin_unlock(&fs_info->trans_lock);
    3244          72 :                         if (is_dead_root) {
    3245             :                                 /* prevent this orphan from being found again */
    3246          72 :                                 key.offset = found_key.objectid - 1;
    3247          72 :                                 continue;
    3248             :                         }
    3249             :                 }
    3250             :                 /*
    3251             :                  * Inode is already gone but the orphan item is still there,
    3252             :                  * kill the orphan item.
    3253             :                  */
    3254           1 :                 if (ret == -ESTALE) {
    3255           0 :                         trans = btrfs_start_transaction(root, 1);
    3256           0 :                         if (IS_ERR(trans)) {
    3257           0 :                                 ret = PTR_ERR(trans);
    3258           0 :                                 goto out;
    3259             :                         }
    3260             :                         btrfs_debug(root->fs_info, "auto deleting %Lu",
    3261             :                                 found_key.objectid);
    3262           0 :                         ret = btrfs_del_orphan_item(trans, root,
    3263             :                                                     found_key.objectid);
    3264           0 :                         btrfs_end_transaction(trans, root);
    3265           0 :                         if (ret)
    3266             :                                 goto out;
    3267           0 :                         continue;
    3268             :                 }
    3269             : 
    3270             :                 /*
    3271             :                  * add this inode to the orphan list so btrfs_orphan_del does
    3272             :                  * the proper thing when we hit it
    3273             :                  */
    3274             :                 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
    3275             :                         &BTRFS_I(inode)->runtime_flags);
    3276           1 :                 atomic_inc(&root->orphan_inodes);
    3277             : 
    3278             :                 /* if we have links, this was a truncate, lets do that */
    3279           1 :                 if (inode->i_nlink) {
    3280           0 :                         if (WARN_ON(!S_ISREG(inode->i_mode))) {
    3281           0 :                                 iput(inode);
    3282           0 :                                 continue;
    3283             :                         }
    3284             :                         nr_truncate++;
    3285             : 
    3286             :                         /* 1 for the orphan item deletion. */
    3287           0 :                         trans = btrfs_start_transaction(root, 1);
    3288           0 :                         if (IS_ERR(trans)) {
    3289           0 :                                 iput(inode);
    3290           0 :                                 ret = PTR_ERR(trans);
    3291           0 :                                 goto out;
    3292             :                         }
    3293           0 :                         ret = btrfs_orphan_add(trans, inode);
    3294           0 :                         btrfs_end_transaction(trans, root);
    3295           0 :                         if (ret) {
    3296           0 :                                 iput(inode);
    3297           0 :                                 goto out;
    3298             :                         }
    3299             : 
    3300           0 :                         ret = btrfs_truncate(inode);
    3301           0 :                         if (ret)
    3302           0 :                                 btrfs_orphan_del(NULL, inode);
    3303             :                 } else {
    3304             :                         nr_unlink++;
    3305             :                 }
    3306             : 
    3307             :                 /* this will do delete_inode and everything for us */
    3308           1 :                 iput(inode);
    3309           1 :                 if (ret)
    3310             :                         goto out;
    3311             :         }
    3312             :         /* release the path since we're done with it */
    3313         778 :         btrfs_release_path(path);
    3314             : 
    3315         778 :         root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
    3316             : 
    3317         778 :         if (root->orphan_block_rsv)
    3318           0 :                 btrfs_block_rsv_release(root, root->orphan_block_rsv,
    3319             :                                         (u64)-1);
    3320             : 
    3321        1556 :         if (root->orphan_block_rsv ||
    3322             :             test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
    3323           0 :                 trans = btrfs_join_transaction(root);
    3324           0 :                 if (!IS_ERR(trans))
    3325           0 :                         btrfs_end_transaction(trans, root);
    3326             :         }
    3327             : 
    3328             :         if (nr_unlink)
    3329             :                 btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
    3330             :         if (nr_truncate)
    3331             :                 btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
    3332             : 
    3333             : out:
    3334         778 :         if (ret)
    3335           0 :                 btrfs_crit(root->fs_info,
    3336             :                         "could not do orphan cleanup %d", ret);
    3337         778 :         btrfs_free_path(path);
    3338         778 :         return ret;
    3339             : }
    3340             : 
    3341             : /*
    3342             :  * very simple check to peek ahead in the leaf looking for xattrs.  If we
    3343             :  * don't find any xattrs, we know there can't be any acls.
    3344             :  *
    3345             :  * slot is the slot the inode is in, objectid is the objectid of the inode
    3346             :  */
    3347        4946 : static noinline int acls_after_inode_item(struct extent_buffer *leaf,
    3348             :                                           int slot, u64 objectid,
    3349             :                                           int *first_xattr_slot)
    3350             : {
    3351             :         u32 nritems = btrfs_header_nritems(leaf);
    3352             :         struct btrfs_key found_key;
    3353             :         static u64 xattr_access = 0;
    3354             :         static u64 xattr_default = 0;
    3355             :         int scanned = 0;
    3356             : 
    3357        4946 :         if (!xattr_access) {
    3358           0 :                 xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
    3359             :                                         strlen(POSIX_ACL_XATTR_ACCESS));
    3360           0 :                 xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
    3361             :                                         strlen(POSIX_ACL_XATTR_DEFAULT));
    3362             :         }
    3363             : 
    3364        4946 :         slot++;
    3365        4946 :         *first_xattr_slot = -1;
    3366       10191 :         while (slot < nritems) {
    3367        4798 :                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
    3368             : 
    3369             :                 /* we found a different objectid, there must not be acls */
    3370        4798 :                 if (found_key.objectid != objectid)
    3371             :                         return 0;
    3372             : 
    3373             :                 /* we found an xattr, assume we've got an acl */
    3374        3939 :                 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
    3375         183 :                         if (*first_xattr_slot == -1)
    3376         182 :                                 *first_xattr_slot = slot;
    3377         366 :                         if (found_key.offset == xattr_access ||
    3378         183 :                             found_key.offset == xattr_default)
    3379             :                                 return 1;
    3380             :                 }
    3381             : 
    3382             :                 /*
    3383             :                  * we found a key greater than an xattr key, there can't
    3384             :                  * be any acls later on
    3385             :                  */
    3386        3939 :                 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
    3387             :                         return 0;
    3388             : 
    3389         299 :                 slot++;
    3390         299 :                 scanned++;
    3391             : 
    3392             :                 /*
    3393             :                  * it goes inode, inode backrefs, xattrs, extents,
    3394             :                  * so if there are a ton of hard links to an inode there can
    3395             :                  * be a lot of backrefs.  Don't waste time searching too hard,
    3396             :                  * this is just an optimization
    3397             :                  */
    3398         299 :                 if (scanned >= 8)
    3399             :                         break;
    3400             :         }
    3401             :         /* we hit the end of the leaf before we found an xattr or
    3402             :          * something larger than an xattr.  We have to assume the inode
    3403             :          * has acls
    3404             :          */
    3405         447 :         if (*first_xattr_slot == -1)
    3406         441 :                 *first_xattr_slot = slot;
    3407             :         return 1;
    3408             : }
    3409             : 
    3410             : /*
    3411             :  * read an inode from the btree into the in-memory inode
    3412             :  */
    3413        5018 : static void btrfs_read_locked_inode(struct inode *inode)
    3414             : {
    3415             :         struct btrfs_path *path;
    3416        4758 :         struct extent_buffer *leaf;
    3417             :         struct btrfs_inode_item *inode_item;
    3418             :         struct btrfs_timespec *tspec;
    3419        5018 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    3420             :         struct btrfs_key location;
    3421             :         unsigned long ptr;
    3422             :         int maybe_acls;
    3423             :         u32 rdev;
    3424             :         int ret;
    3425             :         bool filled = false;
    3426             :         int first_xattr_slot;
    3427             : 
    3428        5018 :         ret = btrfs_fill_inode(inode, &rdev);
    3429        5018 :         if (!ret)
    3430             :                 filled = true;
    3431             : 
    3432        5018 :         path = btrfs_alloc_path();
    3433        5018 :         if (!path)
    3434             :                 goto make_bad;
    3435             : 
    3436        5018 :         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
    3437             : 
    3438        5018 :         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
    3439        5018 :         if (ret)
    3440             :                 goto make_bad;
    3441             : 
    3442        4946 :         leaf = path->nodes[0];
    3443             : 
    3444        4946 :         if (filled)
    3445             :                 goto cache_index;
    3446             : 
    3447        9892 :         inode_item = btrfs_item_ptr(leaf, path->slots[0],
    3448             :                                     struct btrfs_inode_item);
    3449        4946 :         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
    3450        4946 :         set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
    3451             :         i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
    3452             :         i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
    3453             :         btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
    3454             : 
    3455             :         tspec = btrfs_inode_atime(inode_item);
    3456        4946 :         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
    3457        4946 :         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
    3458             : 
    3459             :         tspec = btrfs_inode_mtime(inode_item);
    3460        4946 :         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
    3461        4946 :         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
    3462             : 
    3463             :         tspec = btrfs_inode_ctime(inode_item);
    3464        4946 :         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
    3465        4946 :         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
    3466             : 
    3467        4946 :         inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
    3468        4946 :         BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
    3469        4946 :         BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
    3470             : 
    3471             :         /*
    3472             :          * If we were modified in the current generation and evicted from memory
    3473             :          * and then re-read we need to do a full sync since we don't have any
    3474             :          * idea about which extents were modified before we were evicted from
    3475             :          * cache.
    3476             :          */
    3477        4946 :         if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
    3478             :                 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    3479             :                         &BTRFS_I(inode)->runtime_flags);
    3480             : 
    3481        4946 :         inode->i_version = btrfs_inode_sequence(leaf, inode_item);
    3482        4946 :         inode->i_generation = BTRFS_I(inode)->generation;
    3483        4946 :         inode->i_rdev = 0;
    3484        4946 :         rdev = btrfs_inode_rdev(leaf, inode_item);
    3485             : 
    3486        4946 :         BTRFS_I(inode)->index_cnt = (u64)-1;
    3487        4946 :         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
    3488             : 
    3489             : cache_index:
    3490        4946 :         path->slots[0]++;
    3491        9704 :         if (inode->i_nlink != 1 ||
    3492        4758 :             path->slots[0] >= btrfs_header_nritems(leaf))
    3493             :                 goto cache_acl;
    3494             : 
    3495        4677 :         btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
    3496        9354 :         if (location.objectid != btrfs_ino(inode))
    3497             :                 goto cache_acl;
    3498             : 
    3499        8996 :         ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
    3500        4498 :         if (location.type == BTRFS_INODE_REF_KEY) {
    3501             :                 struct btrfs_inode_ref *ref;
    3502             : 
    3503        4360 :                 ref = (struct btrfs_inode_ref *)ptr;
    3504        4360 :                 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
    3505         138 :         } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
    3506             :                 struct btrfs_inode_extref *extref;
    3507             : 
    3508           1 :                 extref = (struct btrfs_inode_extref *)ptr;
    3509           1 :                 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
    3510             :                                                                      extref);
    3511             :         }
    3512             : cache_acl:
    3513             :         /*
    3514             :          * try to precache a NULL acl entry for files that don't have
    3515             :          * any xattrs or acls
    3516             :          */
    3517        4946 :         maybe_acls = acls_after_inode_item(leaf, path->slots[0],
    3518             :                                            btrfs_ino(inode), &first_xattr_slot);
    3519        4946 :         if (first_xattr_slot != -1) {
    3520         623 :                 path->slots[0] = first_xattr_slot;
    3521         623 :                 ret = btrfs_load_inode_props(inode, path);
    3522         623 :                 if (ret)
    3523           0 :                         btrfs_err(root->fs_info,
    3524             :                                   "error loading props for ino %llu (root %llu): %d",
    3525             :                                   btrfs_ino(inode),
    3526             :                                   root->root_key.objectid, ret);
    3527             :         }
    3528        4946 :         btrfs_free_path(path);
    3529             : 
    3530        4946 :         if (!maybe_acls)
    3531             :                 cache_no_acl(inode);
    3532             : 
    3533        4946 :         switch (inode->i_mode & S_IFMT) {
    3534             :         case S_IFREG:
    3535        3228 :                 inode->i_mapping->a_ops = &btrfs_aops;
    3536        3228 :                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
    3537        3228 :                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
    3538        3228 :                 inode->i_fop = &btrfs_file_operations;
    3539        3228 :                 inode->i_op = &btrfs_file_inode_operations;
    3540        3228 :                 break;
    3541             :         case S_IFDIR:
    3542        1213 :                 inode->i_fop = &btrfs_dir_file_operations;
    3543        1213 :                 if (root == root->fs_info->tree_root)
    3544           0 :                         inode->i_op = &btrfs_dir_ro_inode_operations;
    3545             :                 else
    3546        1213 :                         inode->i_op = &btrfs_dir_inode_operations;
    3547             :                 break;
    3548             :         case S_IFLNK:
    3549         267 :                 inode->i_op = &btrfs_symlink_inode_operations;
    3550         267 :                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
    3551         267 :                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
    3552         267 :                 break;
    3553             :         default:
    3554         238 :                 inode->i_op = &btrfs_special_inode_operations;
    3555         238 :                 init_special_inode(inode, inode->i_mode, rdev);
    3556         238 :                 break;
    3557             :         }
    3558             : 
    3559        4946 :         btrfs_update_iflags(inode);
    3560        9964 :         return;
    3561             : 
    3562             : make_bad:
    3563          72 :         btrfs_free_path(path);
    3564          72 :         make_bad_inode(inode);
    3565             : }
    3566             : 
    3567             : /*
    3568             :  * given a leaf and an inode, copy the inode fields into the leaf
    3569             :  */
    3570       42409 : static void fill_inode_item(struct btrfs_trans_handle *trans,
    3571             :                             struct extent_buffer *leaf,
    3572             :                             struct btrfs_inode_item *item,
    3573             :                             struct inode *inode)
    3574             : {
    3575             :         struct btrfs_map_token token;
    3576             : 
    3577             :         btrfs_init_map_token(&token);
    3578             : 
    3579             :         btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
    3580             :         btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
    3581       42410 :         btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
    3582             :                                    &token);
    3583       42410 :         btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
    3584       42410 :         btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
    3585             : 
    3586       42410 :         btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
    3587       42410 :                                      inode->i_atime.tv_sec, &token);
    3588       42410 :         btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
    3589       42410 :                                       inode->i_atime.tv_nsec, &token);
    3590             : 
    3591       42410 :         btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
    3592       42410 :                                      inode->i_mtime.tv_sec, &token);
    3593       42410 :         btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
    3594       42410 :                                       inode->i_mtime.tv_nsec, &token);
    3595             : 
    3596       42410 :         btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
    3597       42410 :                                      inode->i_ctime.tv_sec, &token);
    3598       42410 :         btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
    3599       42410 :                                       inode->i_ctime.tv_nsec, &token);
    3600             : 
    3601       42410 :         btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
    3602             :                                      &token);
    3603       42410 :         btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
    3604             :                                          &token);
    3605       42410 :         btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
    3606       42410 :         btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
    3607       42410 :         btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
    3608       42410 :         btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
    3609             :         btrfs_set_token_inode_block_group(leaf, item, 0, &token);
    3610       42410 : }
    3611             : 
    3612             : /*
    3613             :  * copy everything in the in-memory inode into the btree.
    3614             :  */
    3615       43876 : static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
    3616             :                                 struct btrfs_root *root, struct inode *inode)
    3617             : {
    3618             :         struct btrfs_inode_item *inode_item;
    3619             :         struct btrfs_path *path;
    3620             :         struct extent_buffer *leaf;
    3621             :         int ret;
    3622             : 
    3623       21938 :         path = btrfs_alloc_path();
    3624       21938 :         if (!path)
    3625             :                 return -ENOMEM;
    3626             : 
    3627       21938 :         path->leave_spinning = 1;
    3628       21938 :         ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
    3629             :                                  1);
    3630       21938 :         if (ret) {
    3631           0 :                 if (ret > 0)
    3632             :                         ret = -ENOENT;
    3633             :                 goto failed;
    3634             :         }
    3635             : 
    3636       21938 :         leaf = path->nodes[0];
    3637       43876 :         inode_item = btrfs_item_ptr(leaf, path->slots[0],
    3638             :                                     struct btrfs_inode_item);
    3639             : 
    3640       21938 :         fill_inode_item(trans, leaf, inode_item, inode);
    3641       21938 :         btrfs_mark_buffer_dirty(leaf);
    3642             :         btrfs_set_inode_last_trans(trans, inode);
    3643             :         ret = 0;
    3644             : failed:
    3645       21938 :         btrfs_free_path(path);
    3646       21937 :         return ret;
    3647             : }
    3648             : 
    3649             : /*
    3650             :  * copy everything in the in-memory inode into the btree.
    3651             :  */
    3652      322344 : noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
    3653             :                                 struct btrfs_root *root, struct inode *inode)
    3654             : {
    3655             :         int ret;
    3656             : 
    3657             :         /*
    3658             :          * If the inode is a free space inode, we can deadlock during commit
    3659             :          * if we put it into the delayed code.
    3660             :          *
    3661             :          * The data relocation inode should also be directly updated
    3662             :          * without delay
    3663             :          */
    3664      172140 :         if (!btrfs_is_free_space_inode(inode)
    3665      152244 :             && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
    3666      150202 :                 btrfs_update_root_times(trans, root);
    3667             : 
    3668      150203 :                 ret = btrfs_delayed_update_inode(trans, root, inode);
    3669      150204 :                 if (!ret)
    3670             :                         btrfs_set_inode_last_trans(trans, inode);
    3671      150204 :                 return ret;
    3672             :         }
    3673             : 
    3674       21938 :         return btrfs_update_inode_item(trans, root, inode);
    3675             : }
    3676             : 
    3677       51685 : noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
    3678             :                                          struct btrfs_root *root,
    3679             :                                          struct inode *inode)
    3680             : {
    3681             :         int ret;
    3682             : 
    3683       51685 :         ret = btrfs_update_inode(trans, root, inode);
    3684       51684 :         if (ret == -ENOSPC)
    3685           0 :                 return btrfs_update_inode_item(trans, root, inode);
    3686             :         return ret;
    3687             : }
    3688             : 
    3689             : /*
    3690             :  * unlink helper that gets used here in inode.c and in the tree logging
    3691             :  * recovery code.  It remove a link in a directory with a given name, and
    3692             :  * also drops the back refs in the inode to the directory
    3693             :  */
    3694       12289 : static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
    3695             :                                 struct btrfs_root *root,
    3696             :                                 struct inode *dir, struct inode *inode,
    3697             :                                 const char *name, int name_len)
    3698             : {
    3699             :         struct btrfs_path *path;
    3700             :         int ret = 0;
    3701             :         struct extent_buffer *leaf;
    3702             :         struct btrfs_dir_item *di;
    3703             :         struct btrfs_key key;
    3704             :         u64 index;
    3705             :         u64 ino = btrfs_ino(inode);
    3706             :         u64 dir_ino = btrfs_ino(dir);
    3707             : 
    3708       12289 :         path = btrfs_alloc_path();
    3709       12289 :         if (!path) {
    3710             :                 ret = -ENOMEM;
    3711             :                 goto out;
    3712             :         }
    3713             : 
    3714       12289 :         path->leave_spinning = 1;
    3715       12289 :         di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
    3716             :                                     name, name_len, -1);
    3717       12289 :         if (IS_ERR(di)) {
    3718           0 :                 ret = PTR_ERR(di);
    3719           0 :                 goto err;
    3720             :         }
    3721       12289 :         if (!di) {
    3722             :                 ret = -ENOENT;
    3723             :                 goto err;
    3724             :         }
    3725       12289 :         leaf = path->nodes[0];
    3726       12289 :         btrfs_dir_item_key_to_cpu(leaf, di, &key);
    3727       12289 :         ret = btrfs_delete_one_dir_name(trans, root, path, di);
    3728       12289 :         if (ret)
    3729             :                 goto err;
    3730       12289 :         btrfs_release_path(path);
    3731             : 
    3732             :         /*
    3733             :          * If we don't have dir index, we have to get it by looking up
    3734             :          * the inode ref, since we get the inode ref, remove it directly,
    3735             :          * it is unnecessary to do delayed deletion.
    3736             :          *
    3737             :          * But if we have dir index, needn't search inode ref to get it.
    3738             :          * Since the inode ref is close to the inode item, it is better
    3739             :          * that we delay to delete it, and just do this deletion when
    3740             :          * we update the inode item.
    3741             :          */
    3742       12289 :         if (BTRFS_I(inode)->dir_index) {
    3743        6256 :                 ret = btrfs_delayed_delete_inode_ref(inode);
    3744        6256 :                 if (!ret) {
    3745        6256 :                         index = BTRFS_I(inode)->dir_index;
    3746        6256 :                         goto skip_backref;
    3747             :                 }
    3748             :         }
    3749             : 
    3750        6033 :         ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
    3751             :                                   dir_ino, &index);
    3752        6033 :         if (ret) {
    3753           0 :                 btrfs_info(root->fs_info,
    3754             :                         "failed to delete reference to %.*s, inode %llu parent %llu",
    3755             :                         name_len, name, ino, dir_ino);
    3756           0 :                 btrfs_abort_transaction(trans, root, ret);
    3757           0 :                 goto err;
    3758             :         }
    3759             : skip_backref:
    3760       12289 :         ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
    3761       12289 :         if (ret) {
    3762           0 :                 btrfs_abort_transaction(trans, root, ret);
    3763           0 :                 goto err;
    3764             :         }
    3765             : 
    3766       12289 :         ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
    3767             :                                          inode, dir_ino);
    3768       12289 :         if (ret != 0 && ret != -ENOENT) {
    3769           0 :                 btrfs_abort_transaction(trans, root, ret);
    3770           0 :                 goto err;
    3771             :         }
    3772             : 
    3773       12289 :         ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
    3774             :                                            dir, index);
    3775       12289 :         if (ret == -ENOENT)
    3776             :                 ret = 0;
    3777       12138 :         else if (ret)
    3778           0 :                 btrfs_abort_transaction(trans, root, ret);
    3779             : err:
    3780       12289 :         btrfs_free_path(path);
    3781       12289 :         if (ret)
    3782             :                 goto out;
    3783             : 
    3784       12289 :         btrfs_i_size_write(dir, dir->i_size - name_len * 2);
    3785             :         inode_inc_iversion(inode);
    3786             :         inode_inc_iversion(dir);
    3787       12289 :         inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
    3788       12289 :         ret = btrfs_update_inode(trans, root, dir);
    3789             : out:
    3790       12289 :         return ret;
    3791             : }
    3792             : 
    3793        9978 : int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
    3794             :                        struct btrfs_root *root,
    3795             :                        struct inode *dir, struct inode *inode,
    3796             :                        const char *name, int name_len)
    3797             : {
    3798             :         int ret;
    3799        9978 :         ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
    3800        9978 :         if (!ret) {
    3801        9978 :                 drop_nlink(inode);
    3802        9978 :                 ret = btrfs_update_inode(trans, root, inode);
    3803             :         }
    3804        9978 :         return ret;
    3805             : }
    3806             : 
    3807             : /*
    3808             :  * helper to start transaction for unlink and rmdir.
    3809             :  *
    3810             :  * unlink and rmdir are special in btrfs, they do not always free space, so
    3811             :  * if we cannot make our reservations the normal way try and see if there is
    3812             :  * plenty of slack room in the global reserve to migrate, otherwise we cannot
    3813             :  * allow the unlink to occur.
    3814             :  */
    3815        9966 : static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
    3816             : {
    3817             :         struct btrfs_trans_handle *trans;
    3818        9966 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    3819             :         int ret;
    3820             : 
    3821             :         /*
    3822             :          * 1 for the possible orphan item
    3823             :          * 1 for the dir item
    3824             :          * 1 for the dir index
    3825             :          * 1 for the inode ref
    3826             :          * 1 for the inode
    3827             :          */
    3828        9966 :         trans = btrfs_start_transaction(root, 5);
    3829        9966 :         if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
    3830             :                 return trans;
    3831             : 
    3832           0 :         if (PTR_ERR(trans) == -ENOSPC) {
    3833             :                 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
    3834             : 
    3835           0 :                 trans = btrfs_start_transaction(root, 0);
    3836           0 :                 if (IS_ERR(trans))
    3837             :                         return trans;
    3838           0 :                 ret = btrfs_cond_migrate_bytes(root->fs_info,
    3839           0 :                                                &root->fs_info->trans_block_rsv,
    3840             :                                                num_bytes, 5);
    3841           0 :                 if (ret) {
    3842           0 :                         btrfs_end_transaction(trans, root);
    3843           0 :                         return ERR_PTR(ret);
    3844             :                 }
    3845           0 :                 trans->block_rsv = &root->fs_info->trans_block_rsv;
    3846           0 :                 trans->bytes_reserved = num_bytes;
    3847             :         }
    3848           0 :         return trans;
    3849             : }
    3850             : 
    3851        8623 : static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
    3852             : {
    3853        8623 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    3854             :         struct btrfs_trans_handle *trans;
    3855        8623 :         struct inode *inode = dentry->d_inode;
    3856             :         int ret;
    3857             : 
    3858        8623 :         trans = __unlink_start_trans(dir);
    3859        8623 :         if (IS_ERR(trans))
    3860           0 :                 return PTR_ERR(trans);
    3861             : 
    3862        8623 :         btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
    3863             : 
    3864       17246 :         ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
    3865       17246 :                                  dentry->d_name.name, dentry->d_name.len);
    3866        8623 :         if (ret)
    3867             :                 goto out;
    3868             : 
    3869        8623 :         if (inode->i_nlink == 0) {
    3870        5438 :                 ret = btrfs_orphan_add(trans, inode);
    3871             :                 if (ret)
    3872             :                         goto out;
    3873             :         }
    3874             : 
    3875             : out:
    3876        8623 :         btrfs_end_transaction(trans, root);
    3877        8623 :         btrfs_btree_balance_dirty(root);
    3878        8623 :         return ret;
    3879             : }
    3880             : 
    3881          34 : int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
    3882             :                         struct btrfs_root *root,
    3883             :                         struct inode *dir, u64 objectid,
    3884             :                         const char *name, int name_len)
    3885             : {
    3886             :         struct btrfs_path *path;
    3887             :         struct extent_buffer *leaf;
    3888             :         struct btrfs_dir_item *di;
    3889             :         struct btrfs_key key;
    3890             :         u64 index;
    3891             :         int ret;
    3892             :         u64 dir_ino = btrfs_ino(dir);
    3893             : 
    3894          34 :         path = btrfs_alloc_path();
    3895          34 :         if (!path)
    3896             :                 return -ENOMEM;
    3897             : 
    3898          34 :         di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
    3899             :                                    name, name_len, -1);
    3900          34 :         if (IS_ERR_OR_NULL(di)) {
    3901           0 :                 if (!di)
    3902             :                         ret = -ENOENT;
    3903             :                 else
    3904           0 :                         ret = PTR_ERR(di);
    3905             :                 goto out;
    3906             :         }
    3907             : 
    3908          34 :         leaf = path->nodes[0];
    3909          34 :         btrfs_dir_item_key_to_cpu(leaf, di, &key);
    3910          34 :         WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
    3911          34 :         ret = btrfs_delete_one_dir_name(trans, root, path, di);
    3912          34 :         if (ret) {
    3913           0 :                 btrfs_abort_transaction(trans, root, ret);
    3914           0 :                 goto out;
    3915             :         }
    3916          34 :         btrfs_release_path(path);
    3917             : 
    3918          34 :         ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
    3919             :                                  objectid, root->root_key.objectid,
    3920             :                                  dir_ino, &index, name, name_len);
    3921          34 :         if (ret < 0) {
    3922           0 :                 if (ret != -ENOENT) {
    3923           0 :                         btrfs_abort_transaction(trans, root, ret);
    3924           0 :                         goto out;
    3925             :                 }
    3926           0 :                 di = btrfs_search_dir_index_item(root, path, dir_ino,
    3927             :                                                  name, name_len);
    3928           0 :                 if (IS_ERR_OR_NULL(di)) {
    3929           0 :                         if (!di)
    3930             :                                 ret = -ENOENT;
    3931             :                         else
    3932           0 :                                 ret = PTR_ERR(di);
    3933           0 :                         btrfs_abort_transaction(trans, root, ret);
    3934           0 :                         goto out;
    3935             :                 }
    3936             : 
    3937           0 :                 leaf = path->nodes[0];
    3938           0 :                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    3939           0 :                 btrfs_release_path(path);
    3940           0 :                 index = key.offset;
    3941             :         }
    3942          34 :         btrfs_release_path(path);
    3943             : 
    3944          34 :         ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
    3945          34 :         if (ret) {
    3946           0 :                 btrfs_abort_transaction(trans, root, ret);
    3947           0 :                 goto out;
    3948             :         }
    3949             : 
    3950          34 :         btrfs_i_size_write(dir, dir->i_size - name_len * 2);
    3951             :         inode_inc_iversion(dir);
    3952          34 :         dir->i_mtime = dir->i_ctime = CURRENT_TIME;
    3953          34 :         ret = btrfs_update_inode_fallback(trans, root, dir);
    3954          34 :         if (ret)
    3955           0 :                 btrfs_abort_transaction(trans, root, ret);
    3956             : out:
    3957          34 :         btrfs_free_path(path);
    3958          34 :         return ret;
    3959             : }
    3960             : 
    3961        1911 : static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
    3962             : {
    3963        1911 :         struct inode *inode = dentry->d_inode;
    3964             :         int err = 0;
    3965        1911 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    3966             :         struct btrfs_trans_handle *trans;
    3967             : 
    3968        1911 :         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
    3969             :                 return -ENOTEMPTY;
    3970        1344 :         if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
    3971             :                 return -EPERM;
    3972             : 
    3973        1343 :         trans = __unlink_start_trans(dir);
    3974        1343 :         if (IS_ERR(trans))
    3975           0 :                 return PTR_ERR(trans);
    3976             : 
    3977        1343 :         if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
    3978           0 :                 err = btrfs_unlink_subvol(trans, root, dir,
    3979             :                                           BTRFS_I(inode)->location.objectid,
    3980           0 :                                           dentry->d_name.name,
    3981           0 :                                           dentry->d_name.len);
    3982           0 :                 goto out;
    3983             :         }
    3984             : 
    3985        1343 :         err = btrfs_orphan_add(trans, inode);
    3986        1343 :         if (err)
    3987             :                 goto out;
    3988             : 
    3989             :         /* now the directory is empty */
    3990        2686 :         err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
    3991        2686 :                                  dentry->d_name.name, dentry->d_name.len);
    3992        1343 :         if (!err)
    3993             :                 btrfs_i_size_write(inode, 0);
    3994             : out:
    3995        1343 :         btrfs_end_transaction(trans, root);
    3996        1343 :         btrfs_btree_balance_dirty(root);
    3997             : 
    3998        1343 :         return err;
    3999             : }
    4000             : 
    4001             : /*
    4002             :  * this can truncate away extent items, csum items and directory items.
    4003             :  * It starts at a high offset and removes keys until it can't find
    4004             :  * any higher than new_size
    4005             :  *
    4006             :  * csum items that cross the new i_size are truncated to the new size
    4007             :  * as well.
    4008             :  *
    4009             :  * min_type is the minimum key type to truncate down to.  If set to 0, this
    4010             :  * will kill all the items on this inode, including the INODE_ITEM_KEY.
    4011             :  */
    4012       13759 : int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
    4013             :                                struct btrfs_root *root,
    4014             :                                struct inode *inode,
    4015             :                                u64 new_size, u32 min_type)
    4016             : {
    4017             :         struct btrfs_path *path;
    4018       12914 :         struct extent_buffer *leaf;
    4019             :         struct btrfs_file_extent_item *fi;
    4020             :         struct btrfs_key key;
    4021             :         struct btrfs_key found_key;
    4022             :         u64 extent_start = 0;
    4023             :         u64 extent_num_bytes = 0;
    4024             :         u64 extent_offset = 0;
    4025             :         u64 item_end = 0;
    4026             :         u64 last_size = (u64)-1;
    4027             :         u32 found_type = (u8)-1;
    4028             :         int found_extent;
    4029             :         int del_item;
    4030             :         int pending_del_nr = 0;
    4031             :         int pending_del_slot = 0;
    4032             :         int extent_type = -1;
    4033             :         int ret;
    4034             :         int err = 0;
    4035             :         u64 ino = btrfs_ino(inode);
    4036             : 
    4037       13759 :         BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
    4038             : 
    4039       13759 :         path = btrfs_alloc_path();
    4040       13759 :         if (!path)
    4041             :                 return -ENOMEM;
    4042       13759 :         path->reada = -1;
    4043             : 
    4044             :         /*
    4045             :          * We want to drop from the next block forward in case this new size is
    4046             :          * not block aligned since we will be keeping the last block of the
    4047             :          * extent just the way it is.
    4048             :          */
    4049       17929 :         if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
    4050        4170 :             root == root->fs_info->tree_root)
    4051       13431 :                 btrfs_drop_extent_cache(inode, ALIGN(new_size,
    4052             :                                         root->sectorsize), (u64)-1, 0);
    4053             : 
    4054             :         /*
    4055             :          * This function is also used to drop the items in the log tree before
    4056             :          * we relog the inode, so if root != BTRFS_I(inode)->root, it means
    4057             :          * it is used to drop the loged items. So we shouldn't kill the delayed
    4058             :          * items.
    4059             :          */
    4060       13759 :         if (min_type == 0 && root == BTRFS_I(inode)->root)
    4061        6907 :                 btrfs_kill_delayed_inode_items(inode);
    4062             : 
    4063       13759 :         key.objectid = ino;
    4064       13759 :         key.offset = (u64)-1;
    4065       13759 :         key.type = (u8)-1;
    4066             : 
    4067             : search_again:
    4068       14406 :         path->leave_spinning = 1;
    4069       14406 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    4070       14406 :         if (ret < 0) {
    4071             :                 err = ret;
    4072             :                 goto out;
    4073             :         }
    4074             : 
    4075       14406 :         if (ret > 0) {
    4076             :                 /* there are no items in the tree for us to truncate, we're
    4077             :                  * done
    4078             :                  */
    4079       14406 :                 if (path->slots[0] == 0)
    4080             :                         goto out;
    4081       14189 :                 path->slots[0]--;
    4082             :         }
    4083             : 
    4084             :         while (1) {
    4085             :                 fi = NULL;
    4086       38229 :                 leaf = path->nodes[0];
    4087       38229 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    4088       38229 :                 found_type = btrfs_key_type(&found_key);
    4089             : 
    4090       38229 :                 if (found_key.objectid != ino)
    4091             :                         break;
    4092             : 
    4093       38123 :                 if (found_type < min_type)
    4094             :                         break;
    4095             : 
    4096       33833 :                 item_end = found_key.offset;
    4097       33833 :                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
    4098       41164 :                         fi = btrfs_item_ptr(leaf, path->slots[0],
    4099             :                                             struct btrfs_file_extent_item);
    4100       20582 :                         extent_type = btrfs_file_extent_type(leaf, fi);
    4101       20582 :                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
    4102       19228 :                                 item_end +=
    4103             :                                     btrfs_file_extent_num_bytes(leaf, fi);
    4104        1354 :                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
    4105        1354 :                                 item_end += btrfs_file_extent_inline_len(leaf,
    4106             :                                                          path->slots[0], fi);
    4107             :                         }
    4108       20582 :                         item_end--;
    4109             :                 }
    4110       33833 :                 if (found_type > min_type) {
    4111             :                         del_item = 1;
    4112             :                 } else {
    4113        9815 :                         if (item_end < new_size)
    4114             :                                 break;
    4115        8177 :                         if (found_key.offset >= new_size)
    4116             :                                 del_item = 1;
    4117             :                         else
    4118             :                                 del_item = 0;
    4119             :                 }
    4120             :                 found_extent = 0;
    4121             :                 /* FIXME, shrink the extent if the ref count is only 1 */
    4122       32195 :                 if (found_type != BTRFS_EXTENT_DATA_KEY)
    4123             :                         goto delete;
    4124             : 
    4125       18944 :                 if (del_item)
    4126       18348 :                         last_size = found_key.offset;
    4127             :                 else
    4128             :                         last_size = new_size;
    4129             : 
    4130       18944 :                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
    4131             :                         u64 num_dec;
    4132             :                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
    4133       17608 :                         if (!del_item) {
    4134             :                                 u64 orig_num_bytes =
    4135             :                                         btrfs_file_extent_num_bytes(leaf, fi);
    4136         596 :                                 extent_num_bytes = ALIGN(new_size -
    4137             :                                                 found_key.offset,
    4138             :                                                 root->sectorsize);
    4139             :                                 btrfs_set_file_extent_num_bytes(leaf, fi,
    4140             :                                                          extent_num_bytes);
    4141         596 :                                 num_dec = (orig_num_bytes -
    4142             :                                            extent_num_bytes);
    4143         596 :                                 if (test_bit(BTRFS_ROOT_REF_COWS,
    4144         596 :                                              &root->state) &&
    4145             :                                     extent_start != 0)
    4146         331 :                                         inode_sub_bytes(inode, num_dec);
    4147         596 :                                 btrfs_mark_buffer_dirty(leaf);
    4148             :                         } else {
    4149             :                                 extent_num_bytes =
    4150             :                                         btrfs_file_extent_disk_num_bytes(leaf,
    4151             :                                                                          fi);
    4152       34024 :                                 extent_offset = found_key.offset -
    4153             :                                         btrfs_file_extent_offset(leaf, fi);
    4154             : 
    4155             :                                 /* FIXME blocksize != 4096 */
    4156             :                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
    4157       17012 :                                 if (extent_start != 0) {
    4158             :                                         found_extent = 1;
    4159       12923 :                                         if (test_bit(BTRFS_ROOT_REF_COWS,
    4160             :                                                      &root->state))
    4161        9072 :                                                 inode_sub_bytes(inode, num_dec);
    4162             :                                 }
    4163             :                         }
    4164        1336 :                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
    4165             :                         /*
    4166             :                          * we can't truncate inline items that have had
    4167             :                          * special encodings
    4168             :                          */
    4169        1336 :                         if (!del_item &&
    4170           0 :                             btrfs_file_extent_compression(leaf, fi) == 0 &&
    4171           0 :                             btrfs_file_extent_encryption(leaf, fi) == 0 &&
    4172           0 :                             btrfs_file_extent_other_encoding(leaf, fi) == 0) {
    4173           0 :                                 u32 size = new_size - found_key.offset;
    4174             : 
    4175           0 :                                 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
    4176           0 :                                         inode_sub_bytes(inode, item_end + 1 -
    4177             :                                                         new_size);
    4178             : 
    4179             :                                 /*
    4180             :                                  * update the ram bytes to properly reflect
    4181             :                                  * the new size of our item
    4182             :                                  */
    4183           0 :                                 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
    4184             :                                 size =
    4185             :                                     btrfs_file_extent_calc_inline_size(size);
    4186           0 :                                 btrfs_truncate_item(root, path, size, 1);
    4187        1336 :                         } else if (test_bit(BTRFS_ROOT_REF_COWS,
    4188             :                                             &root->state)) {
    4189        1336 :                                 inode_sub_bytes(inode, item_end + 1 -
    4190        1336 :                                                 found_key.offset);
    4191             :                         }
    4192             :                 }
    4193             : delete:
    4194       32195 :                 if (del_item) {
    4195       31599 :                         if (!pending_del_nr) {
    4196             :                                 /* no pending yet, add ourselves */
    4197       11847 :                                 pending_del_slot = path->slots[0];
    4198             :                                 pending_del_nr = 1;
    4199       39504 :                         } else if (pending_del_nr &&
    4200       19752 :                                    path->slots[0] + 1 == pending_del_slot) {
    4201             :                                 /* hop on the pending chunk */
    4202       19752 :                                 pending_del_nr++;
    4203       19752 :                                 pending_del_slot = path->slots[0];
    4204             :                         } else {
    4205           0 :                                 BUG();
    4206             :                         }
    4207             :                 } else {
    4208             :                         break;
    4209             :                 }
    4210       44522 :                 if (found_extent &&
    4211        3851 :                     (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
    4212        3851 :                      root == root->fs_info->tree_root)) {
    4213       12914 :                         btrfs_set_path_blocking(path);
    4214       12914 :                         ret = btrfs_free_extent(trans, root, extent_start,
    4215             :                                                 extent_num_bytes, 0,
    4216             :                                                 btrfs_header_owner(leaf),
    4217             :                                                 ino, extent_offset, 0);
    4218       12914 :                         BUG_ON(ret);
    4219             :                 }
    4220             : 
    4221       31599 :                 if (found_type == BTRFS_INODE_ITEM_KEY)
    4222             :                         break;
    4223             : 
    4224       24687 :                 if (path->slots[0] == 0 ||
    4225             :                     path->slots[0] != pending_del_slot) {
    4226         647 :                         if (pending_del_nr) {
    4227         647 :                                 ret = btrfs_del_items(trans, root, path,
    4228             :                                                 pending_del_slot,
    4229             :                                                 pending_del_nr);
    4230         647 :                                 if (ret) {
    4231           0 :                                         btrfs_abort_transaction(trans,
    4232             :                                                                 root, ret);
    4233           0 :                                         goto error;
    4234             :                                 }
    4235             :                                 pending_del_nr = 0;
    4236             :                         }
    4237         647 :                         btrfs_release_path(path);
    4238         647 :                         goto search_again;
    4239             :                 } else {
    4240       24040 :                         path->slots[0]--;
    4241             :                 }
    4242       24040 :         }
    4243             : out:
    4244       13759 :         if (pending_del_nr) {
    4245       11200 :                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
    4246             :                                       pending_del_nr);
    4247       11200 :                 if (ret)
    4248           0 :                         btrfs_abort_transaction(trans, root, ret);
    4249             :         }
    4250             : error:
    4251       21563 :         if (last_size != (u64)-1 &&
    4252        7804 :             root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
    4253        7801 :                 btrfs_ordered_update_i_size(inode, last_size, NULL);
    4254       13759 :         btrfs_free_path(path);
    4255       13759 :         return err;
    4256             : }
    4257             : 
    4258             : /*
    4259             :  * btrfs_truncate_page - read, zero a chunk and write a page
    4260             :  * @inode - inode that we're zeroing
    4261             :  * @from - the offset to start zeroing
    4262             :  * @len - the length to zero, 0 to zero the entire range respective to the
    4263             :  *      offset
    4264             :  * @front - zero up to the offset instead of from the offset on
    4265             :  *
    4266             :  * This will find the page for the "from" offset and cow the page and zero the
    4267             :  * part we want to zero.  This is used with truncate and hole punching.
    4268             :  */
    4269        6432 : int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
    4270             :                         int front)
    4271             : {
    4272        6432 :         struct address_space *mapping = inode->i_mapping;
    4273        6432 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4274        6432 :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
    4275             :         struct btrfs_ordered_extent *ordered;
    4276        6432 :         struct extent_state *cached_state = NULL;
    4277             :         char *kaddr;
    4278        6432 :         u32 blocksize = root->sectorsize;
    4279        6432 :         pgoff_t index = from >> PAGE_CACHE_SHIFT;
    4280        6432 :         unsigned offset = from & (PAGE_CACHE_SIZE-1);
    4281        2356 :         struct page *page;
    4282             :         gfp_t mask = btrfs_alloc_write_mask(mapping);
    4283             :         int ret = 0;
    4284             :         u64 page_start;
    4285             :         u64 page_end;
    4286             : 
    4287        6432 :         if ((offset & (blocksize - 1)) == 0 &&
    4288           0 :             (!len || ((len & (blocksize - 1)) == 0)))
    4289             :                 goto out;
    4290        2351 :         ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
    4291        2351 :         if (ret)
    4292             :                 goto out;
    4293             : 
    4294             : again:
    4295             :         page = find_or_create_page(mapping, index, mask);
    4296        2356 :         if (!page) {
    4297           0 :                 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
    4298             :                 ret = -ENOMEM;
    4299           0 :                 goto out;
    4300             :         }
    4301             : 
    4302        2356 :         page_start = page_offset(page);
    4303        2356 :         page_end = page_start + PAGE_CACHE_SIZE - 1;
    4304             : 
    4305        2356 :         if (!PageUptodate(page)) {
    4306             :                 ret = btrfs_readpage(NULL, page);
    4307         887 :                 lock_page(page);
    4308         887 :                 if (page->mapping != mapping) {
    4309           0 :                         unlock_page(page);
    4310           0 :                         page_cache_release(page);
    4311           0 :                         goto again;
    4312             :                 }
    4313         887 :                 if (!PageUptodate(page)) {
    4314             :                         ret = -EIO;
    4315             :                         goto out_unlock;
    4316             :                 }
    4317             :         }
    4318        2356 :         wait_on_page_writeback(page);
    4319             : 
    4320        2356 :         lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
    4321        2356 :         set_page_extent_mapped(page);
    4322             : 
    4323        2356 :         ordered = btrfs_lookup_ordered_extent(inode, page_start);
    4324        2356 :         if (ordered) {
    4325           5 :                 unlock_extent_cached(io_tree, page_start, page_end,
    4326             :                                      &cached_state, GFP_NOFS);
    4327           5 :                 unlock_page(page);
    4328           5 :                 page_cache_release(page);
    4329           5 :                 btrfs_start_ordered_extent(inode, ordered, 1);
    4330           5 :                 btrfs_put_ordered_extent(ordered);
    4331           5 :                 goto again;
    4332             :         }
    4333             : 
    4334        2351 :         clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
    4335             :                           EXTENT_DIRTY | EXTENT_DELALLOC |
    4336             :                           EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
    4337             :                           0, 0, &cached_state, GFP_NOFS);
    4338             : 
    4339        2351 :         ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
    4340             :                                         &cached_state);
    4341        2351 :         if (ret) {
    4342           0 :                 unlock_extent_cached(io_tree, page_start, page_end,
    4343             :                                      &cached_state, GFP_NOFS);
    4344           0 :                 goto out_unlock;
    4345             :         }
    4346             : 
    4347             :         if (offset != PAGE_CACHE_SIZE) {
    4348        2351 :                 if (!len)
    4349        2351 :                         len = PAGE_CACHE_SIZE - offset;
    4350             :                 kaddr = kmap(page);
    4351        2351 :                 if (front)
    4352          18 :                         memset(kaddr, 0, offset);
    4353             :                 else
    4354        2333 :                         memset(kaddr + offset, 0, len);
    4355             :                 flush_dcache_page(page);
    4356             :                 kunmap(page);
    4357             :         }
    4358             :         ClearPageChecked(page);
    4359        2351 :         set_page_dirty(page);
    4360        2351 :         unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
    4361             :                              GFP_NOFS);
    4362             : 
    4363             : out_unlock:
    4364        2351 :         if (ret)
    4365           0 :                 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
    4366        2351 :         unlock_page(page);
    4367        2351 :         page_cache_release(page);
    4368             : out:
    4369        6432 :         return ret;
    4370             : }
    4371             : 
    4372        5031 : static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
    4373             :                              u64 offset, u64 len)
    4374             : {
    4375             :         struct btrfs_trans_handle *trans;
    4376             :         int ret;
    4377             : 
    4378             :         /*
    4379             :          * Still need to make sure the inode looks like it's been updated so
    4380             :          * that any holes get logged if we fsync.
    4381             :          */
    4382       10062 :         if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
    4383           3 :                 BTRFS_I(inode)->last_trans = root->fs_info->generation;
    4384           3 :                 BTRFS_I(inode)->last_sub_trans = root->log_transid;
    4385           3 :                 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
    4386           3 :                 return 0;
    4387             :         }
    4388             : 
    4389             :         /*
    4390             :          * 1 - for the one we're dropping
    4391             :          * 1 - for the one we're adding
    4392             :          * 1 - for updating the inode.
    4393             :          */
    4394        5028 :         trans = btrfs_start_transaction(root, 3);
    4395        5028 :         if (IS_ERR(trans))
    4396           0 :                 return PTR_ERR(trans);
    4397             : 
    4398        5028 :         ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
    4399        5027 :         if (ret) {
    4400           0 :                 btrfs_abort_transaction(trans, root, ret);
    4401           0 :                 btrfs_end_transaction(trans, root);
    4402           0 :                 return ret;
    4403             :         }
    4404             : 
    4405        5027 :         ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
    4406             :                                        0, 0, len, 0, len, 0, 0, 0);
    4407        5028 :         if (ret)
    4408           0 :                 btrfs_abort_transaction(trans, root, ret);
    4409             :         else
    4410        5028 :                 btrfs_update_inode(trans, root, inode);
    4411        5028 :         btrfs_end_transaction(trans, root);
    4412        5028 :         return ret;
    4413             : }
    4414             : 
    4415             : /*
    4416             :  * This function puts in dummy file extents for the area we're creating a hole
    4417             :  * for.  So if we are truncating this file to a larger size we need to insert
    4418             :  * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
    4419             :  * the range between oldsize and size
    4420             :  */
    4421        4955 : int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
    4422             : {
    4423        4955 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4424        4955 :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
    4425        5508 :         struct extent_map *em = NULL;
    4426        4955 :         struct extent_state *cached_state = NULL;
    4427        4955 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
    4428        4955 :         u64 hole_start = ALIGN(oldsize, root->sectorsize);
    4429        4955 :         u64 block_end = ALIGN(size, root->sectorsize);
    4430             :         u64 last_byte;
    4431             :         u64 cur_offset;
    4432             :         u64 hole_size;
    4433             :         int err = 0;
    4434             : 
    4435             :         /*
    4436             :          * If our size started in the middle of a page we need to zero out the
    4437             :          * rest of the page before we expand the i_size, otherwise we could
    4438             :          * expose stale data.
    4439             :          */
    4440        4955 :         err = btrfs_truncate_page(inode, oldsize, 0, 0);
    4441        4955 :         if (err)
    4442             :                 return err;
    4443             : 
    4444        4955 :         if (size <= hole_start)
    4445             :                 return 0;
    4446             : 
    4447             :         while (1) {
    4448             :                 struct btrfs_ordered_extent *ordered;
    4449             : 
    4450        4954 :                 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
    4451             :                                  &cached_state);
    4452        4954 :                 ordered = btrfs_lookup_ordered_range(inode, hole_start,
    4453             :                                                      block_end - hole_start);
    4454        4954 :                 if (!ordered)
    4455             :                         break;
    4456           0 :                 unlock_extent_cached(io_tree, hole_start, block_end - 1,
    4457             :                                      &cached_state, GFP_NOFS);
    4458           0 :                 btrfs_start_ordered_extent(inode, ordered, 1);
    4459           0 :                 btrfs_put_ordered_extent(ordered);
    4460           0 :         }
    4461             : 
    4462             :         cur_offset = hole_start;
    4463             :         while (1) {
    4464        5508 :                 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
    4465             :                                 block_end - cur_offset, 0);
    4466        5508 :                 if (IS_ERR(em)) {
    4467           0 :                         err = PTR_ERR(em);
    4468             :                         em = NULL;
    4469           0 :                         break;
    4470             :                 }
    4471        5508 :                 last_byte = min(extent_map_end(em), block_end);
    4472        5508 :                 last_byte = ALIGN(last_byte , root->sectorsize);
    4473        5508 :                 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
    4474             :                         struct extent_map *hole_em;
    4475        5031 :                         hole_size = last_byte - cur_offset;
    4476             : 
    4477        5031 :                         err = maybe_insert_hole(root, inode, cur_offset,
    4478             :                                                 hole_size);
    4479        5031 :                         if (err)
    4480             :                                 break;
    4481        5031 :                         btrfs_drop_extent_cache(inode, cur_offset,
    4482             :                                                 cur_offset + hole_size - 1, 0);
    4483        5031 :                         hole_em = alloc_extent_map();
    4484        5031 :                         if (!hole_em) {
    4485             :                                 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    4486             :                                         &BTRFS_I(inode)->runtime_flags);
    4487             :                                 goto next;
    4488             :                         }
    4489        5031 :                         hole_em->start = cur_offset;
    4490        5031 :                         hole_em->len = hole_size;
    4491        5031 :                         hole_em->orig_start = cur_offset;
    4492             : 
    4493        5031 :                         hole_em->block_start = EXTENT_MAP_HOLE;
    4494        5031 :                         hole_em->block_len = 0;
    4495        5031 :                         hole_em->orig_block_len = 0;
    4496        5031 :                         hole_em->ram_bytes = hole_size;
    4497        5031 :                         hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
    4498        5031 :                         hole_em->compress_type = BTRFS_COMPRESS_NONE;
    4499        5031 :                         hole_em->generation = root->fs_info->generation;
    4500             : 
    4501             :                         while (1) {
    4502        5031 :                                 write_lock(&em_tree->lock);
    4503        5031 :                                 err = add_extent_mapping(em_tree, hole_em, 1);
    4504             :                                 write_unlock(&em_tree->lock);
    4505        5031 :                                 if (err != -EEXIST)
    4506             :                                         break;
    4507           0 :                                 btrfs_drop_extent_cache(inode, cur_offset,
    4508             :                                                         cur_offset +
    4509             :                                                         hole_size - 1, 0);
    4510           0 :                         }
    4511        5031 :                         free_extent_map(hole_em);
    4512             :                 }
    4513             : next:
    4514        5508 :                 free_extent_map(em);
    4515             :                 em = NULL;
    4516             :                 cur_offset = last_byte;
    4517        5508 :                 if (cur_offset >= block_end)
    4518             :                         break;
    4519             :         }
    4520        4954 :         free_extent_map(em);
    4521        4954 :         unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
    4522             :                              GFP_NOFS);
    4523        4954 :         return err;
    4524             : }
    4525             : 
    4526        7364 : static int btrfs_setsize(struct inode *inode, struct iattr *attr)
    4527             : {
    4528        3682 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4529             :         struct btrfs_trans_handle *trans;
    4530             :         loff_t oldsize = i_size_read(inode);
    4531        3682 :         loff_t newsize = attr->ia_size;
    4532        3682 :         int mask = attr->ia_valid;
    4533             :         int ret;
    4534             : 
    4535             :         /*
    4536             :          * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
    4537             :          * special case where we need to update the times despite not having
    4538             :          * these flags set.  For all other operations the VFS set these flags
    4539             :          * explicitly if it wants a timestamp update.
    4540             :          */
    4541        3682 :         if (newsize != oldsize) {
    4542             :                 inode_inc_iversion(inode);
    4543        1773 :                 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
    4544        1304 :                         inode->i_ctime = inode->i_mtime =
    4545        1304 :                                 current_fs_time(inode->i_sb);
    4546             :         }
    4547             : 
    4548        3682 :         if (newsize > oldsize) {
    4549         959 :                 truncate_pagecache(inode, newsize);
    4550         959 :                 ret = btrfs_cont_expand(inode, oldsize, newsize);
    4551         959 :                 if (ret)
    4552             :                         return ret;
    4553             : 
    4554         959 :                 trans = btrfs_start_transaction(root, 1);
    4555         959 :                 if (IS_ERR(trans))
    4556           0 :                         return PTR_ERR(trans);
    4557             : 
    4558             :                 i_size_write(inode, newsize);
    4559         959 :                 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
    4560         959 :                 ret = btrfs_update_inode(trans, root, inode);
    4561         959 :                 btrfs_end_transaction(trans, root);
    4562             :         } else {
    4563             : 
    4564             :                 /*
    4565             :                  * We're truncating a file that used to have good data down to
    4566             :                  * zero. Make sure it gets into the ordered flush list so that
    4567             :                  * any new writes get down to disk quickly.
    4568             :                  */
    4569        2723 :                 if (newsize == 0)
    4570             :                         set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
    4571             :                                 &BTRFS_I(inode)->runtime_flags);
    4572             : 
    4573             :                 /*
    4574             :                  * 1 for the orphan item we're going to add
    4575             :                  * 1 for the orphan item deletion.
    4576             :                  */
    4577        2723 :                 trans = btrfs_start_transaction(root, 2);
    4578        2723 :                 if (IS_ERR(trans))
    4579           0 :                         return PTR_ERR(trans);
    4580             : 
    4581             :                 /*
    4582             :                  * We need to do this in case we fail at _any_ point during the
    4583             :                  * actual truncate.  Once we do the truncate_setsize we could
    4584             :                  * invalidate pages which forces any outstanding ordered io to
    4585             :                  * be instantly completed which will give us extents that need
    4586             :                  * to be truncated.  If we fail to get an orphan inode down we
    4587             :                  * could have left over extents that were never meant to live,
    4588             :                  * so we need to garuntee from this point on that everything
    4589             :                  * will be consistent.
    4590             :                  */
    4591        2723 :                 ret = btrfs_orphan_add(trans, inode);
    4592        2723 :                 btrfs_end_transaction(trans, root);
    4593        2723 :                 if (ret)
    4594             :                         return ret;
    4595             : 
    4596             :                 /* we don't support swapfiles, so vmtruncate shouldn't fail */
    4597        2723 :                 truncate_setsize(inode, newsize);
    4598             : 
    4599             :                 /* Disable nonlocked read DIO to avoid the end less truncate */
    4600             :                 btrfs_inode_block_unlocked_dio(inode);
    4601        2723 :                 inode_dio_wait(inode);
    4602             :                 btrfs_inode_resume_unlocked_dio(inode);
    4603             : 
    4604        2723 :                 ret = btrfs_truncate(inode);
    4605        2723 :                 if (ret && inode->i_nlink) {
    4606             :                         int err;
    4607             : 
    4608             :                         /*
    4609             :                          * failed to truncate, disk_i_size is only adjusted down
    4610             :                          * as we remove extents, so it should represent the true
    4611             :                          * size of the inode, so reset the in memory size and
    4612             :                          * delete our orphan entry.
    4613             :                          */
    4614           0 :                         trans = btrfs_join_transaction(root);
    4615           0 :                         if (IS_ERR(trans)) {
    4616           0 :                                 btrfs_orphan_del(NULL, inode);
    4617             :                                 return ret;
    4618             :                         }
    4619           0 :                         i_size_write(inode, BTRFS_I(inode)->disk_i_size);
    4620           0 :                         err = btrfs_orphan_del(trans, inode);
    4621           0 :                         if (err)
    4622           0 :                                 btrfs_abort_transaction(trans, root, err);
    4623           0 :                         btrfs_end_transaction(trans, root);
    4624             :                 }
    4625             :         }
    4626             : 
    4627             :         return ret;
    4628             : }
    4629             : 
    4630       11752 : static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
    4631             : {
    4632        8070 :         struct inode *inode = dentry->d_inode;
    4633        8070 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4634             :         int err;
    4635             : 
    4636        8070 :         if (btrfs_root_readonly(root))
    4637             :                 return -EROFS;
    4638             : 
    4639        8070 :         err = inode_change_ok(inode, attr);
    4640        8070 :         if (err)
    4641             :                 return err;
    4642             : 
    4643        8070 :         if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
    4644        3682 :                 err = btrfs_setsize(inode, attr);
    4645        3682 :                 if (err)
    4646             :                         return err;
    4647             :         }
    4648             : 
    4649        8070 :         if (attr->ia_valid) {
    4650        8070 :                 setattr_copy(inode, attr);
    4651             :                 inode_inc_iversion(inode);
    4652        8070 :                 err = btrfs_dirty_inode(inode);
    4653             : 
    4654        8070 :                 if (!err && attr->ia_valid & ATTR_MODE)
    4655         241 :                         err = posix_acl_chmod(inode, inode->i_mode);
    4656             :         }
    4657             : 
    4658        8070 :         return err;
    4659             : }
    4660             : 
    4661             : /*
    4662             :  * While truncating the inode pages during eviction, we get the VFS calling
    4663             :  * btrfs_invalidatepage() against each page of the inode. This is slow because
    4664             :  * the calls to btrfs_invalidatepage() result in a huge amount of calls to
    4665             :  * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
    4666             :  * extent_state structures over and over, wasting lots of time.
    4667             :  *
    4668             :  * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
    4669             :  * those expensive operations on a per page basis and do only the ordered io
    4670             :  * finishing, while we release here the extent_map and extent_state structures,
    4671             :  * without the excessive merging and splitting.
    4672             :  */
    4673       25704 : static void evict_inode_truncate_pages(struct inode *inode)
    4674             : {
    4675       25704 :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
    4676       25704 :         struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
    4677             :         struct rb_node *node;
    4678             : 
    4679             :         ASSERT(inode->i_state & I_FREEING);
    4680       25704 :         truncate_inode_pages_final(&inode->i_data);
    4681             : 
    4682       25704 :         write_lock(&map_tree->lock);
    4683      126766 :         while (!RB_EMPTY_ROOT(&map_tree->map)) {
    4684             :                 struct extent_map *em;
    4685             : 
    4686       75358 :                 node = rb_first(&map_tree->map);
    4687             :                 em = rb_entry(node, struct extent_map, rb_node);
    4688             :                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
    4689             :                 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
    4690       75358 :                 remove_extent_mapping(map_tree, em);
    4691       75358 :                 free_extent_map(em);
    4692       75358 :                 if (need_resched()) {
    4693             :                         write_unlock(&map_tree->lock);
    4694           7 :                         cond_resched();
    4695           7 :                         write_lock(&map_tree->lock);
    4696             :                 }
    4697             :         }
    4698             :         write_unlock(&map_tree->lock);
    4699             : 
    4700             :         spin_lock(&io_tree->lock);
    4701       42529 :         while (!RB_EMPTY_ROOT(&io_tree->state)) {
    4702             :                 struct extent_state *state;
    4703       16825 :                 struct extent_state *cached_state = NULL;
    4704             : 
    4705       16825 :                 node = rb_first(&io_tree->state);
    4706       16825 :                 state = rb_entry(node, struct extent_state, rb_node);
    4707       16825 :                 atomic_inc(&state->refs);
    4708             :                 spin_unlock(&io_tree->lock);
    4709             : 
    4710       16825 :                 lock_extent_bits(io_tree, state->start, state->end,
    4711             :                                  0, &cached_state);
    4712       16825 :                 clear_extent_bit(io_tree, state->start, state->end,
    4713             :                                  EXTENT_LOCKED | EXTENT_DIRTY |
    4714             :                                  EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
    4715             :                                  EXTENT_DEFRAG, 1, 1,
    4716             :                                  &cached_state, GFP_NOFS);
    4717       16825 :                 free_extent_state(state);
    4718             : 
    4719       16825 :                 cond_resched();
    4720             :                 spin_lock(&io_tree->lock);
    4721             :         }
    4722             :         spin_unlock(&io_tree->lock);
    4723       25704 : }
    4724             : 
    4725       25704 : void btrfs_evict_inode(struct inode *inode)
    4726             : {
    4727             :         struct btrfs_trans_handle *trans;
    4728       25704 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4729             :         struct btrfs_block_rsv *rsv, *global_rsv;
    4730             :         u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
    4731             :         int ret;
    4732             : 
    4733       25704 :         trace_btrfs_inode_evict(inode);
    4734             : 
    4735       25704 :         evict_inode_truncate_pages(inode);
    4736             : 
    4737       44501 :         if (inode->i_nlink &&
    4738       18742 :             ((btrfs_root_refs(&root->root_item) != 0 &&
    4739       19365 :               root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
    4740         623 :              btrfs_is_free_space_inode(inode)))
    4741             :                 goto no_delete;
    4742             : 
    4743        7183 :         if (is_bad_inode(inode)) {
    4744           0 :                 btrfs_orphan_del(NULL, inode);
    4745           0 :                 goto no_delete;
    4746             :         }
    4747             :         /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
    4748        7183 :         btrfs_wait_ordered_range(inode, 0, (u64)-1);
    4749             : 
    4750        7183 :         if (root->fs_info->log_root_recovering) {
    4751           0 :                 BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
    4752             :                                  &BTRFS_I(inode)->runtime_flags));
    4753             :                 goto no_delete;
    4754             :         }
    4755             : 
    4756        7183 :         if (inode->i_nlink > 0) {
    4757         276 :                 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
    4758             :                        root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
    4759             :                 goto no_delete;
    4760             :         }
    4761             : 
    4762        6907 :         ret = btrfs_commit_inode_delayed_inode(inode);
    4763        6907 :         if (ret) {
    4764           0 :                 btrfs_orphan_del(NULL, inode);
    4765           0 :                 goto no_delete;
    4766             :         }
    4767             : 
    4768        6907 :         rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
    4769        6907 :         if (!rsv) {
    4770           0 :                 btrfs_orphan_del(NULL, inode);
    4771           0 :                 goto no_delete;
    4772             :         }
    4773        6907 :         rsv->size = min_size;
    4774        6907 :         rsv->failfast = 1;
    4775        6907 :         global_rsv = &root->fs_info->global_block_rsv;
    4776             : 
    4777             :         btrfs_i_size_write(inode, 0);
    4778             : 
    4779             :         /*
    4780             :          * This is a bit simpler than btrfs_truncate since we've already
    4781             :          * reserved our space for our orphan item in the unlink, so we just
    4782             :          * need to reserve some slack space in case we add bytes and update
    4783             :          * inode item when doing the truncate.
    4784             :          */
    4785             :         while (1) {
    4786        6907 :                 ret = btrfs_block_rsv_refill(root, rsv, min_size,
    4787             :                                              BTRFS_RESERVE_FLUSH_LIMIT);
    4788             : 
    4789             :                 /*
    4790             :                  * Try and steal from the global reserve since we will
    4791             :                  * likely not use this space anyway, we want to try as
    4792             :                  * hard as possible to get this to work.
    4793             :                  */
    4794        6907 :                 if (ret)
    4795           0 :                         ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
    4796             : 
    4797        6907 :                 if (ret) {
    4798           0 :                         btrfs_warn(root->fs_info,
    4799             :                                 "Could not get space for a delete, will truncate on mount %d",
    4800             :                                 ret);
    4801           0 :                         btrfs_orphan_del(NULL, inode);
    4802           0 :                         btrfs_free_block_rsv(root, rsv);
    4803           0 :                         goto no_delete;
    4804             :                 }
    4805             : 
    4806        6907 :                 trans = btrfs_join_transaction(root);
    4807        6907 :                 if (IS_ERR(trans)) {
    4808           0 :                         btrfs_orphan_del(NULL, inode);
    4809           0 :                         btrfs_free_block_rsv(root, rsv);
    4810           0 :                         goto no_delete;
    4811             :                 }
    4812             : 
    4813        6907 :                 trans->block_rsv = rsv;
    4814             : 
    4815        6907 :                 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
    4816        6907 :                 if (ret != -ENOSPC)
    4817             :                         break;
    4818             : 
    4819           0 :                 trans->block_rsv = &root->fs_info->trans_block_rsv;
    4820           0 :                 btrfs_end_transaction(trans, root);
    4821             :                 trans = NULL;
    4822           0 :                 btrfs_btree_balance_dirty(root);
    4823           0 :         }
    4824             : 
    4825        6907 :         btrfs_free_block_rsv(root, rsv);
    4826             : 
    4827             :         /*
    4828             :          * Errors here aren't a big deal, it just means we leave orphan items
    4829             :          * in the tree.  They will be cleaned up on the next mount.
    4830             :          */
    4831        6907 :         if (ret == 0) {
    4832        6907 :                 trans->block_rsv = root->orphan_block_rsv;
    4833        6907 :                 btrfs_orphan_del(trans, inode);
    4834             :         } else {
    4835           0 :                 btrfs_orphan_del(NULL, inode);
    4836             :         }
    4837             : 
    4838        6907 :         trans->block_rsv = &root->fs_info->trans_block_rsv;
    4839       13773 :         if (!(root == root->fs_info->tree_root ||
    4840        6866 :               root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
    4841        6866 :                 btrfs_return_ino(root, btrfs_ino(inode));
    4842             : 
    4843        6907 :         btrfs_end_transaction(trans, root);
    4844        6907 :         btrfs_btree_balance_dirty(root);
    4845             : no_delete:
    4846       25704 :         btrfs_remove_delayed_node(inode);
    4847       25704 :         clear_inode(inode);
    4848       25704 :         return;
    4849             : }
    4850             : 
    4851             : /*
    4852             :  * this returns the key found in the dir entry in the location pointer.
    4853             :  * If no dir entries were found, location->objectid is 0.
    4854             :  */
    4855       40768 : static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
    4856             :                                struct btrfs_key *location)
    4857             : {
    4858       40768 :         const char *name = dentry->d_name.name;
    4859       40768 :         int namelen = dentry->d_name.len;
    4860             :         struct btrfs_dir_item *di;
    4861             :         struct btrfs_path *path;
    4862       40768 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    4863             :         int ret = 0;
    4864             : 
    4865       40768 :         path = btrfs_alloc_path();
    4866       40769 :         if (!path)
    4867             :                 return -ENOMEM;
    4868             : 
    4869       40769 :         di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
    4870             :                                     namelen, 0);
    4871       40768 :         if (IS_ERR(di))
    4872           0 :                 ret = PTR_ERR(di);
    4873             : 
    4874       40768 :         if (IS_ERR_OR_NULL(di))
    4875             :                 goto out_err;
    4876             : 
    4877       12304 :         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
    4878             : out:
    4879       40768 :         btrfs_free_path(path);
    4880             :         return ret;
    4881             : out_err:
    4882       28464 :         location->objectid = 0;
    4883             :         goto out;
    4884             : }
    4885             : 
    4886             : /*
    4887             :  * when we hit a tree root in a directory, the btrfs part of the inode
    4888             :  * needs to be changed to reflect the root directory of the tree root.  This
    4889             :  * is kind of like crossing a mount point.
    4890             :  */
    4891         205 : static int fixup_tree_root_location(struct btrfs_root *root,
    4892             :                                     struct inode *dir,
    4893             :                                     struct dentry *dentry,
    4894             :                                     struct btrfs_key *location,
    4895             :                                     struct btrfs_root **sub_root)
    4896             : {
    4897             :         struct btrfs_path *path;
    4898             :         struct btrfs_root *new_root;
    4899             :         struct btrfs_root_ref *ref;
    4900             :         struct extent_buffer *leaf;
    4901             :         int ret;
    4902             :         int err = 0;
    4903             : 
    4904         205 :         path = btrfs_alloc_path();
    4905         205 :         if (!path) {
    4906             :                 err = -ENOMEM;
    4907             :                 goto out;
    4908             :         }
    4909             : 
    4910             :         err = -ENOENT;
    4911         410 :         ret = btrfs_find_item(root->fs_info->tree_root, path,
    4912         205 :                                 BTRFS_I(dir)->root->root_key.objectid,
    4913             :                                 location->objectid, BTRFS_ROOT_REF_KEY, NULL);
    4914         205 :         if (ret) {
    4915           0 :                 if (ret < 0)
    4916             :                         err = ret;
    4917             :                 goto out;
    4918             :         }
    4919             : 
    4920         205 :         leaf = path->nodes[0];
    4921         410 :         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
    4922         410 :         if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
    4923         205 :             btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
    4924             :                 goto out;
    4925             : 
    4926         410 :         ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
    4927         205 :                                    (unsigned long)(ref + 1),
    4928             :                                    dentry->d_name.len);
    4929         205 :         if (ret)
    4930             :                 goto out;
    4931             : 
    4932         205 :         btrfs_release_path(path);
    4933             : 
    4934         205 :         new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
    4935         205 :         if (IS_ERR(new_root)) {
    4936           0 :                 err = PTR_ERR(new_root);
    4937             :                 goto out;
    4938             :         }
    4939             : 
    4940         205 :         *sub_root = new_root;
    4941         205 :         location->objectid = btrfs_root_dirid(&new_root->root_item);
    4942         205 :         location->type = BTRFS_INODE_ITEM_KEY;
    4943         205 :         location->offset = 0;
    4944             :         err = 0;
    4945             : out:
    4946         205 :         btrfs_free_path(path);
    4947         205 :         return err;
    4948             : }
    4949             : 
    4950       25418 : static void inode_tree_add(struct inode *inode)
    4951             : {
    4952       25418 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4953             :         struct btrfs_inode *entry;
    4954             :         struct rb_node **p;
    4955             :         struct rb_node *parent;
    4956       25418 :         struct rb_node *new = &BTRFS_I(inode)->rb_node;
    4957             :         u64 ino = btrfs_ino(inode);
    4958             : 
    4959       25418 :         if (inode_unhashed(inode))
    4960             :                 return;
    4961             :         parent = NULL;
    4962             :         spin_lock(&root->inode_lock);
    4963       25418 :         p = &root->inode_tree.rb_node;
    4964      377160 :         while (*p) {
    4965             :                 parent = *p;
    4966             :                 entry = rb_entry(parent, struct btrfs_inode, rb_node);
    4967             : 
    4968      326324 :                 if (ino < btrfs_ino(&entry->vfs_inode))
    4969        9541 :                         p = &parent->rb_left;
    4970      316783 :                 else if (ino > btrfs_ino(&entry->vfs_inode))
    4971      316783 :                         p = &parent->rb_right;
    4972             :                 else {
    4973           0 :                         WARN_ON(!(entry->vfs_inode.i_state &
    4974             :                                   (I_WILL_FREE | I_FREEING)));
    4975           0 :                         rb_replace_node(parent, new, &root->inode_tree);
    4976           0 :                         RB_CLEAR_NODE(parent);
    4977             :                         spin_unlock(&root->inode_lock);
    4978             :                         return;
    4979             :                 }
    4980             :         }
    4981             :         rb_link_node(new, parent, p);
    4982       25418 :         rb_insert_color(new, &root->inode_tree);
    4983             :         spin_unlock(&root->inode_lock);
    4984             : }
    4985             : 
    4986       25704 : static void inode_tree_del(struct inode *inode)
    4987             : {
    4988       25704 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    4989             :         int empty = 0;
    4990             : 
    4991             :         spin_lock(&root->inode_lock);
    4992       25704 :         if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
    4993       25411 :                 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
    4994       25411 :                 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
    4995       25411 :                 empty = RB_EMPTY_ROOT(&root->inode_tree);
    4996             :         }
    4997             :         spin_unlock(&root->inode_lock);
    4998             : 
    4999       26392 :         if (empty && btrfs_root_refs(&root->root_item) == 0) {
    5000          33 :                 synchronize_srcu(&root->fs_info->subvol_srcu);
    5001             :                 spin_lock(&root->inode_lock);
    5002          33 :                 empty = RB_EMPTY_ROOT(&root->inode_tree);
    5003             :                 spin_unlock(&root->inode_lock);
    5004          33 :                 if (empty)
    5005          33 :                         btrfs_add_dead_root(root);
    5006             :         }
    5007       25704 : }
    5008             : 
    5009          33 : void btrfs_invalidate_inodes(struct btrfs_root *root)
    5010             : {
    5011             :         struct rb_node *node;
    5012             :         struct rb_node *prev;
    5013             :         struct btrfs_inode *entry;
    5014             :         struct inode *inode;
    5015             :         u64 objectid = 0;
    5016             : 
    5017          66 :         if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
    5018          33 :                 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
    5019             : 
    5020             :         spin_lock(&root->inode_lock);
    5021             : again:
    5022          88 :         node = root->inode_tree.rb_node;
    5023             :         prev = NULL;
    5024         258 :         while (node) {
    5025             :                 prev = node;
    5026             :                 entry = rb_entry(node, struct btrfs_inode, rb_node);
    5027             : 
    5028         100 :                 if (objectid < btrfs_ino(&entry->vfs_inode))
    5029          42 :                         node = node->rb_left;
    5030          58 :                 else if (objectid > btrfs_ino(&entry->vfs_inode))
    5031          40 :                         node = node->rb_right;
    5032             :                 else
    5033             :                         break;
    5034             :         }
    5035          88 :         if (!node) {
    5036         107 :                 while (prev) {
    5037             :                         entry = rb_entry(prev, struct btrfs_inode, rb_node);
    5038          74 :                         if (objectid <= btrfs_ino(&entry->vfs_inode)) {
    5039             :                                 node = prev;
    5040             :                                 break;
    5041             :                         }
    5042          37 :                         prev = rb_next(prev);
    5043             :                 }
    5044             :         }
    5045          88 :         while (node) {
    5046             :                 entry = rb_entry(node, struct btrfs_inode, rb_node);
    5047          55 :                 objectid = btrfs_ino(&entry->vfs_inode) + 1;
    5048          55 :                 inode = igrab(&entry->vfs_inode);
    5049          55 :                 if (inode) {
    5050             :                         spin_unlock(&root->inode_lock);
    5051          55 :                         if (atomic_read(&inode->i_count) > 1)
    5052          33 :                                 d_prune_aliases(inode);
    5053             :                         /*
    5054             :                          * btrfs_drop_inode will have it removed from
    5055             :                          * the inode cache when its usage count
    5056             :                          * hits zero.
    5057             :                          */
    5058          55 :                         iput(inode);
    5059          55 :                         cond_resched();
    5060             :                         spin_lock(&root->inode_lock);
    5061             :                         goto again;
    5062             :                 }
    5063             : 
    5064           0 :                 if (cond_resched_lock(&root->inode_lock))
    5065             :                         goto again;
    5066             : 
    5067           0 :                 node = rb_next(node);
    5068             :         }
    5069             :         spin_unlock(&root->inode_lock);
    5070          33 : }
    5071             : 
    5072        5018 : static int btrfs_init_locked_inode(struct inode *inode, void *p)
    5073             : {
    5074             :         struct btrfs_iget_args *args = p;
    5075        5018 :         inode->i_ino = args->location->objectid;
    5076        5018 :         memcpy(&BTRFS_I(inode)->location, args->location,
    5077             :                sizeof(*args->location));
    5078        5018 :         BTRFS_I(inode)->root = args->root;
    5079        5018 :         return 0;
    5080             : }
    5081             : 
    5082       31256 : static int btrfs_find_actor(struct inode *inode, void *opaque)
    5083             : {
    5084             :         struct btrfs_iget_args *args = opaque;
    5085       62449 :         return args->location->objectid == BTRFS_I(inode)->location.objectid &&
    5086       31193 :                 args->root == BTRFS_I(inode)->root;
    5087             : }
    5088             : 
    5089       36211 : static struct inode *btrfs_iget_locked(struct super_block *s,
    5090             :                                        struct btrfs_key *location,
    5091       36211 :                                        struct btrfs_root *root)
    5092             : {
    5093             :         struct inode *inode;
    5094             :         struct btrfs_iget_args args;
    5095       36211 :         unsigned long hashval = btrfs_inode_hash(location->objectid, root);
    5096             : 
    5097       36211 :         args.location = location;
    5098       36211 :         args.root = root;
    5099             : 
    5100       36211 :         inode = iget5_locked(s, hashval, btrfs_find_actor,
    5101             :                              btrfs_init_locked_inode,
    5102             :                              (void *)&args);
    5103       36211 :         return inode;
    5104             : }
    5105             : 
    5106             : /* Get an inode object given its location and corresponding root.
    5107             :  * Returns in *is_new if the inode was read from disk
    5108             :  */
    5109       36211 : struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
    5110             :                          struct btrfs_root *root, int *new)
    5111             : {
    5112             :         struct inode *inode;
    5113             : 
    5114       36211 :         inode = btrfs_iget_locked(s, location, root);
    5115       36211 :         if (!inode)
    5116             :                 return ERR_PTR(-ENOMEM);
    5117             : 
    5118       36211 :         if (inode->i_state & I_NEW) {
    5119        5018 :                 btrfs_read_locked_inode(inode);
    5120        5018 :                 if (!is_bad_inode(inode)) {
    5121        4946 :                         inode_tree_add(inode);
    5122        4946 :                         unlock_new_inode(inode);
    5123        4946 :                         if (new)
    5124           3 :                                 *new = 1;
    5125             :                 } else {
    5126          72 :                         unlock_new_inode(inode);
    5127          72 :                         iput(inode);
    5128             :                         inode = ERR_PTR(-ESTALE);
    5129             :                 }
    5130             :         }
    5131             : 
    5132       36211 :         return inode;
    5133             : }
    5134             : 
    5135           0 : static struct inode *new_simple_dir(struct super_block *s,
    5136             :                                     struct btrfs_key *key,
    5137             :                                     struct btrfs_root *root)
    5138             : {
    5139           0 :         struct inode *inode = new_inode(s);
    5140             : 
    5141           0 :         if (!inode)
    5142             :                 return ERR_PTR(-ENOMEM);
    5143             : 
    5144           0 :         BTRFS_I(inode)->root = root;
    5145           0 :         memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
    5146             :         set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
    5147             : 
    5148           0 :         inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
    5149           0 :         inode->i_op = &btrfs_dir_ro_inode_operations;
    5150           0 :         inode->i_fop = &simple_dir_operations;
    5151           0 :         inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
    5152           0 :         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
    5153             : 
    5154           0 :         return inode;
    5155             : }
    5156             : 
    5157       81537 : struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
    5158             : {
    5159             :         struct inode *inode;
    5160       40768 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    5161       40768 :         struct btrfs_root *sub_root = root;
    5162             :         struct btrfs_key location;
    5163             :         int index;
    5164             :         int ret = 0;
    5165             : 
    5166       40768 :         if (dentry->d_name.len > BTRFS_NAME_LEN)
    5167             :                 return ERR_PTR(-ENAMETOOLONG);
    5168             : 
    5169       40769 :         ret = btrfs_inode_by_name(dir, dentry, &location);
    5170       40769 :         if (ret < 0)
    5171           0 :                 return ERR_PTR(ret);
    5172             : 
    5173       40769 :         if (location.objectid == 0)
    5174             :                 return ERR_PTR(-ENOENT);
    5175             : 
    5176       12304 :         if (location.type == BTRFS_INODE_ITEM_KEY) {
    5177       12099 :                 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
    5178       12099 :                 return inode;
    5179             :         }
    5180             : 
    5181         205 :         BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
    5182             : 
    5183         205 :         index = srcu_read_lock(&root->fs_info->subvol_srcu);
    5184         205 :         ret = fixup_tree_root_location(root, dir, dentry,
    5185             :                                        &location, &sub_root);
    5186         205 :         if (ret < 0) {
    5187           0 :                 if (ret != -ENOENT)
    5188           0 :                         inode = ERR_PTR(ret);
    5189             :                 else
    5190           0 :                         inode = new_simple_dir(dir->i_sb, &location, sub_root);
    5191             :         } else {
    5192         205 :                 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
    5193             :         }
    5194         205 :         srcu_read_unlock(&root->fs_info->subvol_srcu, index);
    5195             : 
    5196         205 :         if (!IS_ERR(inode) && root != sub_root) {
    5197         205 :                 down_read(&root->fs_info->cleanup_work_sem);
    5198         205 :                 if (!(inode->i_sb->s_flags & MS_RDONLY))
    5199         205 :                         ret = btrfs_orphan_cleanup(sub_root);
    5200         205 :                 up_read(&root->fs_info->cleanup_work_sem);
    5201         205 :                 if (ret) {
    5202           0 :                         iput(inode);
    5203           0 :                         inode = ERR_PTR(ret);
    5204             :                 }
    5205             :                 /*
    5206             :                  * If orphan cleanup did remove any orphans, it means the tree
    5207             :                  * was modified and therefore the commit root is not the same as
    5208             :                  * the current root anymore. This is a problem, because send
    5209             :                  * uses the commit root and therefore can see inode items that
    5210             :                  * don't exist in the current root anymore, and for example make
    5211             :                  * calls to btrfs_iget, which will do tree lookups based on the
    5212             :                  * current root and not on the commit root. Those lookups will
    5213             :                  * fail, returning a -ESTALE error, and making send fail with
    5214             :                  * that error. So make sure a send does not see any orphans we
    5215             :                  * have just removed, and that it will see the same inodes
    5216             :                  * regardless of whether a transaction commit happened before
    5217             :                  * it started (meaning that the commit root will be the same as
    5218             :                  * the current root) or not.
    5219             :                  */
    5220         205 :                 if (sub_root->node != sub_root->commit_root) {
    5221             :                         u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
    5222             : 
    5223           6 :                         if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
    5224             :                                 struct extent_buffer *eb;
    5225             : 
    5226             :                                 /*
    5227             :                                  * Assert we can't have races between dentry
    5228             :                                  * lookup called through the snapshot creation
    5229             :                                  * ioctl and the VFS.
    5230             :                                  */
    5231             :                                 ASSERT(mutex_is_locked(&dir->i_mutex));
    5232             : 
    5233           1 :                                 down_write(&root->fs_info->commit_root_sem);
    5234           1 :                                 eb = sub_root->commit_root;
    5235           1 :                                 sub_root->commit_root =
    5236           1 :                                         btrfs_root_node(sub_root);
    5237           1 :                                 up_write(&root->fs_info->commit_root_sem);
    5238           1 :                                 free_extent_buffer(eb);
    5239             :                         }
    5240             :                 }
    5241             :         }
    5242             : 
    5243         205 :         return inode;
    5244             : }
    5245             : 
    5246     1454590 : static int btrfs_dentry_delete(const struct dentry *dentry)
    5247             : {
    5248             :         struct btrfs_root *root;
    5249     1454590 :         struct inode *inode = dentry->d_inode;
    5250             : 
    5251     1454590 :         if (!inode && !IS_ROOT(dentry))
    5252       24597 :                 inode = dentry->d_parent->d_inode;
    5253             : 
    5254     1454590 :         if (inode) {
    5255     1454595 :                 root = BTRFS_I(inode)->root;
    5256     1454595 :                 if (btrfs_root_refs(&root->root_item) == 0)
    5257             :                         return 1;
    5258             : 
    5259     1454600 :                 if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
    5260             :                         return 1;
    5261             :         }
    5262     1454613 :         return 0;
    5263             : }
    5264             : 
    5265       39072 : static void btrfs_dentry_release(struct dentry *dentry)
    5266             : {
    5267       39072 :         kfree(dentry->d_fsdata);
    5268       39072 : }
    5269             : 
    5270       40574 : static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
    5271             :                                    unsigned int flags)
    5272             : {
    5273             :         struct inode *inode;
    5274             : 
    5275       40574 :         inode = btrfs_lookup_dentry(dir, dentry);
    5276       40574 :         if (IS_ERR(inode)) {
    5277       28466 :                 if (PTR_ERR(inode) == -ENOENT)
    5278             :                         inode = NULL;
    5279             :                 else
    5280             :                         return ERR_CAST(inode);
    5281             :         }
    5282             : 
    5283       40574 :         return d_materialise_unique(dentry, inode);
    5284             : }
    5285             : 
    5286             : unsigned char btrfs_filetype_table[] = {
    5287             :         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
    5288             : };
    5289             : 
    5290       22061 : static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
    5291             : {
    5292             :         struct inode *inode = file_inode(file);
    5293       22061 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    5294             :         struct btrfs_item *item;
    5295             :         struct btrfs_dir_item *di;
    5296             :         struct btrfs_key key;
    5297             :         struct btrfs_key found_key;
    5298             :         struct btrfs_path *path;
    5299             :         struct list_head ins_list;
    5300             :         struct list_head del_list;
    5301             :         int ret;
    5302      169847 :         struct extent_buffer *leaf;
    5303             :         int slot;
    5304             :         unsigned char d_type;
    5305             :         int over = 0;
    5306             :         u32 di_cur;
    5307             :         u32 di_total;
    5308             :         u32 di_len;
    5309             :         int key_type = BTRFS_DIR_INDEX_KEY;
    5310             :         char tmp_name[32];
    5311             :         char *name_ptr;
    5312             :         int name_len;
    5313             :         int is_curr = 0;        /* ctx->pos points to the current index? */
    5314             : 
    5315             :         /* FIXME, use a real flag for deciding about the key type */
    5316       22061 :         if (root->fs_info->tree_root == root)
    5317             :                 key_type = BTRFS_DIR_ITEM_KEY;
    5318             : 
    5319       22061 :         if (!dir_emit_dots(file, ctx))
    5320             :                 return 0;
    5321             : 
    5322       22061 :         path = btrfs_alloc_path();
    5323       22061 :         if (!path)
    5324             :                 return -ENOMEM;
    5325             : 
    5326       22061 :         path->reada = 1;
    5327             : 
    5328       22061 :         if (key_type == BTRFS_DIR_INDEX_KEY) {
    5329             :                 INIT_LIST_HEAD(&ins_list);
    5330             :                 INIT_LIST_HEAD(&del_list);
    5331       22061 :                 btrfs_get_delayed_items(inode, &ins_list, &del_list);
    5332             :         }
    5333             : 
    5334       22061 :         btrfs_set_key_type(&key, key_type);
    5335       22061 :         key.offset = ctx->pos;
    5336       22061 :         key.objectid = btrfs_ino(inode);
    5337             : 
    5338       22061 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    5339       22061 :         if (ret < 0)
    5340             :                 goto err;
    5341             : 
    5342             :         while (1) {
    5343      169847 :                 leaf = path->nodes[0];
    5344      169847 :                 slot = path->slots[0];
    5345      339694 :                 if (slot >= btrfs_header_nritems(leaf)) {
    5346        1392 :                         ret = btrfs_next_leaf(root, path);
    5347        1392 :                         if (ret < 0)
    5348             :                                 goto err;
    5349        1392 :                         else if (ret > 0)
    5350             :                                 break;
    5351        1313 :                         continue;
    5352             :                 }
    5353             : 
    5354             :                 item = btrfs_item_nr(slot);
    5355      168455 :                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
    5356             : 
    5357      168455 :                 if (found_key.objectid != key.objectid)
    5358             :                         break;
    5359      146477 :                 if (btrfs_key_type(&found_key) != key_type)
    5360             :                         break;
    5361      146477 :                 if (found_key.offset < ctx->pos)
    5362             :                         goto next;
    5363      292954 :                 if (key_type == BTRFS_DIR_INDEX_KEY &&
    5364      146477 :                     btrfs_should_delete_dir_index(&del_list,
    5365             :                                                   found_key.offset))
    5366             :                         goto next;
    5367             : 
    5368      146360 :                 ctx->pos = found_key.offset;
    5369             :                 is_curr = 1;
    5370             : 
    5371      146360 :                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
    5372             :                 di_cur = 0;
    5373             :                 di_total = btrfs_item_size(leaf, item);
    5374             : 
    5375      146360 :                 while (di_cur < di_total) {
    5376             :                         struct btrfs_key location;
    5377             : 
    5378      146360 :                         if (verify_dir_item(root, leaf, di))
    5379             :                                 break;
    5380             : 
    5381      146360 :                         name_len = btrfs_dir_name_len(leaf, di);
    5382      146360 :                         if (name_len <= sizeof(tmp_name)) {
    5383             :                                 name_ptr = tmp_name;
    5384             :                         } else {
    5385         523 :                                 name_ptr = kmalloc(name_len, GFP_NOFS);
    5386         523 :                                 if (!name_ptr) {
    5387             :                                         ret = -ENOMEM;
    5388           0 :                                         goto err;
    5389             :                                 }
    5390             :                         }
    5391      292720 :                         read_extent_buffer(leaf, name_ptr,
    5392      146360 :                                            (unsigned long)(di + 1), name_len);
    5393             : 
    5394      146360 :                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
    5395      146360 :                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
    5396             : 
    5397             : 
    5398             :                         /* is this a reference to our own snapshot? If so
    5399             :                          * skip it.
    5400             :                          *
    5401             :                          * In contrast to old kernels, we insert the snapshot's
    5402             :                          * dir item and dir index after it has been created, so
    5403             :                          * we won't find a reference to our own snapshot. We
    5404             :                          * still keep the following code for backward
    5405             :                          * compatibility.
    5406             :                          */
    5407      146382 :                         if (location.type == BTRFS_ROOT_ITEM_KEY &&
    5408          22 :                             location.objectid == root->root_key.objectid) {
    5409             :                                 over = 0;
    5410             :                                 goto skip;
    5411             :                         }
    5412      292720 :                         over = !dir_emit(ctx, name_ptr, name_len,
    5413             :                                        location.objectid, d_type);
    5414             : 
    5415             : skip:
    5416      146360 :                         if (name_ptr != tmp_name)
    5417         523 :                                 kfree(name_ptr);
    5418             : 
    5419      146360 :                         if (over)
    5420             :                                 goto nopos;
    5421      292712 :                         di_len = btrfs_dir_name_len(leaf, di) +
    5422             :                                  btrfs_dir_data_len(leaf, di) + sizeof(*di);
    5423      146356 :                         di_cur += di_len;
    5424      146356 :                         di = (struct btrfs_dir_item *)((char *)di + di_len);
    5425             :                 }
    5426             : next:
    5427      146473 :                 path->slots[0]++;
    5428             :         }
    5429             : 
    5430       22057 :         if (key_type == BTRFS_DIR_INDEX_KEY) {
    5431       22057 :                 if (is_curr)
    5432       11155 :                         ctx->pos++;
    5433       22057 :                 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
    5434       22057 :                 if (ret)
    5435             :                         goto nopos;
    5436             :         }
    5437             : 
    5438             :         /* Reached end of directory/root. Bump pos past the last item. */
    5439       22057 :         ctx->pos++;
    5440             : 
    5441             :         /*
    5442             :          * Stop new entries from being returned after we return the last
    5443             :          * entry.
    5444             :          *
    5445             :          * New directory entries are assigned a strictly increasing
    5446             :          * offset.  This means that new entries created during readdir
    5447             :          * are *guaranteed* to be seen in the future by that readdir.
    5448             :          * This has broken buggy programs which operate on names as
    5449             :          * they're returned by readdir.  Until we re-use freed offsets
    5450             :          * we have this hack to stop new entries from being returned
    5451             :          * under the assumption that they'll never reach this huge
    5452             :          * offset.
    5453             :          *
    5454             :          * This is being careful not to overflow 32bit loff_t unless the
    5455             :          * last entry requires it because doing so has broken 32bit apps
    5456             :          * in the past.
    5457             :          */
    5458       22057 :         if (key_type == BTRFS_DIR_INDEX_KEY) {
    5459       22057 :                 if (ctx->pos >= INT_MAX)
    5460       10479 :                         ctx->pos = LLONG_MAX;
    5461             :                 else
    5462       11578 :                         ctx->pos = INT_MAX;
    5463             :         }
    5464             : nopos:
    5465             :         ret = 0;
    5466             : err:
    5467       22061 :         if (key_type == BTRFS_DIR_INDEX_KEY)
    5468       22061 :                 btrfs_put_delayed_items(&ins_list, &del_list);
    5469       22061 :         btrfs_free_path(path);
    5470       22061 :         return ret;
    5471             : }
    5472             : 
    5473           0 : int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
    5474             : {
    5475           0 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    5476             :         struct btrfs_trans_handle *trans;
    5477             :         int ret = 0;
    5478             :         bool nolock = false;
    5479             : 
    5480           0 :         if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
    5481             :                 return 0;
    5482             : 
    5483           0 :         if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
    5484             :                 nolock = true;
    5485             : 
    5486           0 :         if (wbc->sync_mode == WB_SYNC_ALL) {
    5487           0 :                 if (nolock)
    5488           0 :                         trans = btrfs_join_transaction_nolock(root);
    5489             :                 else
    5490           0 :                         trans = btrfs_join_transaction(root);
    5491           0 :                 if (IS_ERR(trans))
    5492           0 :                         return PTR_ERR(trans);
    5493           0 :                 ret = btrfs_commit_transaction(trans, root);
    5494             :         }
    5495           0 :         return ret;
    5496             : }
    5497             : 
    5498             : /*
    5499             :  * This is somewhat expensive, updating the tree every time the
    5500             :  * inode changes.  But, it is most likely to find the inode in cache.
    5501             :  * FIXME, needs more benchmarking...there are no reasons other than performance
    5502             :  * to keep or drop this code.
    5503             :  */
    5504       13216 : static int btrfs_dirty_inode(struct inode *inode)
    5505             : {
    5506       13216 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    5507             :         struct btrfs_trans_handle *trans;
    5508             :         int ret;
    5509             : 
    5510       13216 :         if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
    5511             :                 return 0;
    5512             : 
    5513       13216 :         trans = btrfs_join_transaction(root);
    5514       13216 :         if (IS_ERR(trans))
    5515           0 :                 return PTR_ERR(trans);
    5516             : 
    5517       13216 :         ret = btrfs_update_inode(trans, root, inode);
    5518       13216 :         if (ret && ret == -ENOSPC) {
    5519             :                 /* whoops, lets try again with the full transaction */
    5520           0 :                 btrfs_end_transaction(trans, root);
    5521           0 :                 trans = btrfs_start_transaction(root, 1);
    5522           0 :                 if (IS_ERR(trans))
    5523           0 :                         return PTR_ERR(trans);
    5524             : 
    5525           0 :                 ret = btrfs_update_inode(trans, root, inode);
    5526             :         }
    5527       13216 :         btrfs_end_transaction(trans, root);
    5528       13216 :         if (BTRFS_I(inode)->delayed_node)
    5529       13216 :                 btrfs_balance_delayed_items(root);
    5530             : 
    5531       13216 :         return ret;
    5532             : }
    5533             : 
    5534             : /*
    5535             :  * This is a copy of file_update_time.  We need this so we can return error on
    5536             :  * ENOSPC for updating the inode in the case of file write and mmap writes.
    5537             :  */
    5538       46157 : static int btrfs_update_time(struct inode *inode, struct timespec *now,
    5539             :                              int flags)
    5540             : {
    5541       46157 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    5542             : 
    5543       46157 :         if (btrfs_root_readonly(root))
    5544             :                 return -EROFS;
    5545             : 
    5546        5146 :         if (flags & S_VERSION)
    5547             :                 inode_inc_iversion(inode);
    5548        5146 :         if (flags & S_CTIME)
    5549        1544 :                 inode->i_ctime = *now;
    5550        5146 :         if (flags & S_MTIME)
    5551        1544 :                 inode->i_mtime = *now;
    5552        5146 :         if (flags & S_ATIME)
    5553        3602 :                 inode->i_atime = *now;
    5554        5146 :         return btrfs_dirty_inode(inode);
    5555             : }
    5556             : 
    5557             : /*
    5558             :  * find the highest existing sequence number in a directory
    5559             :  * and then set the in-memory index_cnt variable to reflect
    5560             :  * free sequence numbers
    5561             :  */
    5562         185 : static int btrfs_set_inode_index_count(struct inode *inode)
    5563             : {
    5564         185 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    5565             :         struct btrfs_key key, found_key;
    5566             :         struct btrfs_path *path;
    5567             :         struct extent_buffer *leaf;
    5568             :         int ret;
    5569             : 
    5570         185 :         key.objectid = btrfs_ino(inode);
    5571             :         btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
    5572         185 :         key.offset = (u64)-1;
    5573             : 
    5574         185 :         path = btrfs_alloc_path();
    5575         185 :         if (!path)
    5576             :                 return -ENOMEM;
    5577             : 
    5578         185 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    5579         185 :         if (ret < 0)
    5580             :                 goto out;
    5581             :         /* FIXME: we should be able to handle this */
    5582         185 :         if (ret == 0)
    5583             :                 goto out;
    5584             :         ret = 0;
    5585             : 
    5586             :         /*
    5587             :          * MAGIC NUMBER EXPLANATION:
    5588             :          * since we search a directory based on f_pos we have to start at 2
    5589             :          * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
    5590             :          * else has to start at 2
    5591             :          */
    5592         185 :         if (path->slots[0] == 0) {
    5593           0 :                 BTRFS_I(inode)->index_cnt = 2;
    5594           0 :                 goto out;
    5595             :         }
    5596             : 
    5597         185 :         path->slots[0]--;
    5598             : 
    5599         185 :         leaf = path->nodes[0];
    5600         185 :         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    5601             : 
    5602         555 :         if (found_key.objectid != btrfs_ino(inode) ||
    5603         185 :             btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
    5604         112 :                 BTRFS_I(inode)->index_cnt = 2;
    5605         112 :                 goto out;
    5606             :         }
    5607             : 
    5608          73 :         BTRFS_I(inode)->index_cnt = found_key.offset + 1;
    5609             : out:
    5610         185 :         btrfs_free_path(path);
    5611         185 :         return ret;
    5612             : }
    5613             : 
    5614             : /*
    5615             :  * helper to find a free sequence number in a given directory.  This current
    5616             :  * code is very simple, later versions will do smarter things in the btree
    5617             :  */
    5618       26501 : int btrfs_set_inode_index(struct inode *dir, u64 *index)
    5619             : {
    5620             :         int ret = 0;
    5621             : 
    5622       26501 :         if (BTRFS_I(dir)->index_cnt == (u64)-1) {
    5623         185 :                 ret = btrfs_inode_delayed_dir_index_count(dir);
    5624         185 :                 if (ret) {
    5625         185 :                         ret = btrfs_set_inode_index_count(dir);
    5626         185 :                         if (ret)
    5627             :                                 return ret;
    5628             :                 }
    5629             :         }
    5630             : 
    5631       26501 :         *index = BTRFS_I(dir)->index_cnt;
    5632       26501 :         BTRFS_I(dir)->index_cnt++;
    5633             : 
    5634       26501 :         return ret;
    5635             : }
    5636             : 
    5637       20472 : static int btrfs_insert_inode_locked(struct inode *inode)
    5638             : {
    5639             :         struct btrfs_iget_args args;
    5640       20472 :         args.location = &BTRFS_I(inode)->location;
    5641       20472 :         args.root = BTRFS_I(inode)->root;
    5642             : 
    5643       40944 :         return insert_inode_locked4(inode,
    5644       20472 :                    btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
    5645             :                    btrfs_find_actor, &args);
    5646             : }
    5647             : 
    5648       40944 : static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
    5649             :                                      struct btrfs_root *root,
    5650             :                                      struct inode *dir,
    5651             :                                      const char *name, int name_len,
    5652             :                                      u64 ref_objectid, u64 objectid,
    5653             :                                      umode_t mode, u64 *index)
    5654             : {
    5655             :         struct inode *inode;
    5656             :         struct btrfs_inode_item *inode_item;
    5657             :         struct btrfs_key *location;
    5658             :         struct btrfs_path *path;
    5659             :         struct btrfs_inode_ref *ref;
    5660             :         struct btrfs_key key[2];
    5661             :         u32 sizes[2];
    5662       20472 :         int nitems = name ? 2 : 1;
    5663             :         unsigned long ptr;
    5664             :         int ret;
    5665             : 
    5666       20472 :         path = btrfs_alloc_path();
    5667       20472 :         if (!path)
    5668             :                 return ERR_PTR(-ENOMEM);
    5669             : 
    5670       20472 :         inode = new_inode(root->fs_info->sb);
    5671       20472 :         if (!inode) {
    5672           0 :                 btrfs_free_path(path);
    5673           0 :                 return ERR_PTR(-ENOMEM);
    5674             :         }
    5675             : 
    5676             :         /*
    5677             :          * O_TMPFILE, set link count to 0, so that after this point,
    5678             :          * we fill in an inode item with the correct link count.
    5679             :          */
    5680       20472 :         if (!name)
    5681           2 :                 set_nlink(inode, 0);
    5682             : 
    5683             :         /*
    5684             :          * we have to initialize this early, so we can reclaim the inode
    5685             :          * number if we fail afterwards in this function.
    5686             :          */
    5687       20472 :         inode->i_ino = objectid;
    5688             : 
    5689       20472 :         if (dir && name) {
    5690       20421 :                 trace_btrfs_inode_request(dir);
    5691             : 
    5692       20421 :                 ret = btrfs_set_inode_index(dir, index);
    5693       20421 :                 if (ret) {
    5694           0 :                         btrfs_free_path(path);
    5695           0 :                         iput(inode);
    5696           0 :                         return ERR_PTR(ret);
    5697             :                 }
    5698          51 :         } else if (dir) {
    5699           2 :                 *index = 0;
    5700             :         }
    5701             :         /*
    5702             :          * index_cnt is ignored for everything but a dir,
    5703             :          * btrfs_get_inode_index_count has an explanation for the magic
    5704             :          * number
    5705             :          */
    5706       20472 :         BTRFS_I(inode)->index_cnt = 2;
    5707       20472 :         BTRFS_I(inode)->dir_index = *index;
    5708       20472 :         BTRFS_I(inode)->root = root;
    5709       20472 :         BTRFS_I(inode)->generation = trans->transid;
    5710       20472 :         inode->i_generation = BTRFS_I(inode)->generation;
    5711             : 
    5712             :         /*
    5713             :          * We could have gotten an inode number from somebody who was fsynced
    5714             :          * and then removed in this same transaction, so let's just set full
    5715             :          * sync since it will be a full sync anyway and this will blow away the
    5716             :          * old info in the log.
    5717             :          */
    5718             :         set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
    5719             : 
    5720       20472 :         key[0].objectid = objectid;
    5721             :         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
    5722       20472 :         key[0].offset = 0;
    5723             : 
    5724       20472 :         sizes[0] = sizeof(struct btrfs_inode_item);
    5725             : 
    5726       20472 :         if (name) {
    5727             :                 /*
    5728             :                  * Start new inodes with an inode_ref. This is slightly more
    5729             :                  * efficient for small numbers of hard links since they will
    5730             :                  * be packed into one item. Extended refs will kick in if we
    5731             :                  * add more hard links than can fit in the ref item.
    5732             :                  */
    5733       20470 :                 key[1].objectid = objectid;
    5734             :                 btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
    5735       20470 :                 key[1].offset = ref_objectid;
    5736             : 
    5737       20470 :                 sizes[1] = name_len + sizeof(*ref);
    5738             :         }
    5739             : 
    5740             :         location = &BTRFS_I(inode)->location;
    5741       20472 :         location->objectid = objectid;
    5742       20472 :         location->offset = 0;
    5743             :         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
    5744             : 
    5745       20472 :         ret = btrfs_insert_inode_locked(inode);
    5746       20472 :         if (ret < 0)
    5747             :                 goto fail;
    5748             : 
    5749       20472 :         path->leave_spinning = 1;
    5750       20472 :         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
    5751       20472 :         if (ret != 0)
    5752             :                 goto fail_unlock;
    5753             : 
    5754       20472 :         inode_init_owner(inode, dir, mode);
    5755       20472 :         inode_set_bytes(inode, 0);
    5756       20472 :         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
    5757       40944 :         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
    5758             :                                   struct btrfs_inode_item);
    5759       20472 :         memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
    5760             :                              sizeof(*inode_item));
    5761       20472 :         fill_inode_item(trans, path->nodes[0], inode_item, inode);
    5762             : 
    5763       20472 :         if (name) {
    5764       40940 :                 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
    5765             :                                      struct btrfs_inode_ref);
    5766       20470 :                 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
    5767       20470 :                 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
    5768       20470 :                 ptr = (unsigned long)(ref + 1);
    5769       20470 :                 write_extent_buffer(path->nodes[0], name, ptr, name_len);
    5770             :         }
    5771             : 
    5772       20472 :         btrfs_mark_buffer_dirty(path->nodes[0]);
    5773       20472 :         btrfs_free_path(path);
    5774             : 
    5775       20472 :         btrfs_inherit_iflags(inode, dir);
    5776             : 
    5777       20472 :         if (S_ISREG(mode)) {
    5778       13866 :                 if (btrfs_test_opt(root, NODATASUM))
    5779           1 :                         BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
    5780       13866 :                 if (btrfs_test_opt(root, NODATACOW))
    5781           1 :                         BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
    5782             :                                 BTRFS_INODE_NODATASUM;
    5783             :         }
    5784             : 
    5785       20472 :         inode_tree_add(inode);
    5786             : 
    5787       20472 :         trace_btrfs_inode_new(inode);
    5788             :         btrfs_set_inode_last_trans(trans, inode);
    5789             : 
    5790       20472 :         btrfs_update_root_times(trans, root);
    5791             : 
    5792       20472 :         ret = btrfs_inode_inherit_props(trans, inode, dir);
    5793       20472 :         if (ret)
    5794           0 :                 btrfs_err(root->fs_info,
    5795             :                           "error inheriting props for ino %llu (root %llu): %d",
    5796             :                           btrfs_ino(inode), root->root_key.objectid, ret);
    5797             : 
    5798       20472 :         return inode;
    5799             : 
    5800             : fail_unlock:
    5801           0 :         unlock_new_inode(inode);
    5802             : fail:
    5803           0 :         if (dir && name)
    5804           0 :                 BTRFS_I(dir)->index_cnt--;
    5805           0 :         btrfs_free_path(path);
    5806           0 :         iput(inode);
    5807           0 :         return ERR_PTR(ret);
    5808             : }
    5809             : 
    5810             : static inline u8 btrfs_inode_type(struct inode *inode)
    5811             : {
    5812       26306 :         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
    5813             : }
    5814             : 
    5815             : /*
    5816             :  * utility function to add 'inode' into 'parent_inode' with
    5817             :  * a give name and a given sequence number.
    5818             :  * if 'add_backref' is true, also insert a backref from the
    5819             :  * inode to the parent directory.
    5820             :  */
    5821       26306 : int btrfs_add_link(struct btrfs_trans_handle *trans,
    5822       26306 :                    struct inode *parent_inode, struct inode *inode,
    5823             :                    const char *name, int name_len, int add_backref, u64 index)
    5824             : {
    5825             :         int ret = 0;
    5826             :         struct btrfs_key key;
    5827       26306 :         struct btrfs_root *root = BTRFS_I(parent_inode)->root;
    5828             :         u64 ino = btrfs_ino(inode);
    5829             :         u64 parent_ino = btrfs_ino(parent_inode);
    5830             : 
    5831       26306 :         if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
    5832           1 :                 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
    5833             :         } else {
    5834       26305 :                 key.objectid = ino;
    5835             :                 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
    5836       26305 :                 key.offset = 0;
    5837             :         }
    5838             : 
    5839       26306 :         if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
    5840           1 :                 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
    5841             :                                          key.objectid, root->root_key.objectid,
    5842             :                                          parent_ino, index, name, name_len);
    5843       26305 :         } else if (add_backref) {
    5844        3573 :                 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
    5845             :                                              parent_ino, index);
    5846             :         }
    5847             : 
    5848             :         /* Nothing to clean up yet */
    5849       26306 :         if (ret)
    5850             :                 return ret;
    5851             : 
    5852       26306 :         ret = btrfs_insert_dir_item(trans, root, name, name_len,
    5853             :                                     parent_inode, &key,
    5854             :                                     btrfs_inode_type(inode), index);
    5855       26306 :         if (ret == -EEXIST || ret == -EOVERFLOW)
    5856             :                 goto fail_dir_item;
    5857       26306 :         else if (ret) {
    5858           0 :                 btrfs_abort_transaction(trans, root, ret);
    5859           0 :                 return ret;
    5860             :         }
    5861             : 
    5862       52612 :         btrfs_i_size_write(parent_inode, parent_inode->i_size +
    5863       26306 :                            name_len * 2);
    5864             :         inode_inc_iversion(parent_inode);
    5865       26306 :         parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
    5866       26306 :         ret = btrfs_update_inode(trans, root, parent_inode);
    5867       26306 :         if (ret)
    5868           0 :                 btrfs_abort_transaction(trans, root, ret);
    5869       26306 :         return ret;
    5870             : 
    5871             : fail_dir_item:
    5872           0 :         if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
    5873             :                 u64 local_index;
    5874             :                 int err;
    5875           0 :                 err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
    5876             :                                  key.objectid, root->root_key.objectid,
    5877             :                                  parent_ino, &local_index, name, name_len);
    5878             : 
    5879           0 :         } else if (add_backref) {
    5880             :                 u64 local_index;
    5881             :                 int err;
    5882             : 
    5883           0 :                 err = btrfs_del_inode_ref(trans, root, name, name_len,
    5884             :                                           ino, parent_ino, &local_index);
    5885             :         }
    5886           0 :         return ret;
    5887             : }
    5888             : 
    5889             : static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
    5890             :                             struct inode *dir, struct dentry *dentry,
    5891             :                             struct inode *inode, int backref, u64 index)
    5892             : {
    5893       21581 :         int err = btrfs_add_link(trans, dir, inode,
    5894             :                                  dentry->d_name.name, dentry->d_name.len,
    5895             :                                  backref, index);
    5896       21581 :         if (err > 0)
    5897             :                 err = -EEXIST;
    5898             :         return err;
    5899             : }
    5900             : 
    5901        4194 : static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
    5902             :                         umode_t mode, dev_t rdev)
    5903             : {
    5904             :         struct btrfs_trans_handle *trans;
    5905        2097 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    5906             :         struct inode *inode = NULL;
    5907             :         int err;
    5908             :         int drop_inode = 0;
    5909             :         u64 objectid;
    5910        2097 :         u64 index = 0;
    5911             : 
    5912             :         if (!new_valid_dev(rdev))
    5913             :                 return -EINVAL;
    5914             : 
    5915             :         /*
    5916             :          * 2 for inode item and ref
    5917             :          * 2 for dir items
    5918             :          * 1 for xattr if selinux is on
    5919             :          */
    5920        2097 :         trans = btrfs_start_transaction(root, 5);
    5921        2097 :         if (IS_ERR(trans))
    5922           0 :                 return PTR_ERR(trans);
    5923             : 
    5924        2097 :         err = btrfs_find_free_ino(root, &objectid);
    5925        2097 :         if (err)
    5926             :                 goto out_unlock;
    5927             : 
    5928        4194 :         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
    5929        2097 :                                 dentry->d_name.len, btrfs_ino(dir), objectid,
    5930             :                                 mode, &index);
    5931        2097 :         if (IS_ERR(inode)) {
    5932           0 :                 err = PTR_ERR(inode);
    5933           0 :                 goto out_unlock;
    5934             :         }
    5935             : 
    5936             :         /*
    5937             :         * If the active LSM wants to access the inode during
    5938             :         * d_instantiate it needs these. Smack checks to see
    5939             :         * if the filesystem supports xattrs by looking at the
    5940             :         * ops vector.
    5941             :         */
    5942        2097 :         inode->i_op = &btrfs_special_inode_operations;
    5943        2097 :         init_special_inode(inode, inode->i_mode, rdev);
    5944             : 
    5945        2097 :         err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
    5946        2097 :         if (err)
    5947             :                 goto out_unlock_inode;
    5948             : 
    5949        2097 :         err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
    5950        2097 :         if (err) {
    5951             :                 goto out_unlock_inode;
    5952             :         } else {
    5953        2097 :                 btrfs_update_inode(trans, root, inode);
    5954        2097 :                 unlock_new_inode(inode);
    5955        2097 :                 d_instantiate(dentry, inode);
    5956             :         }
    5957             : 
    5958             : out_unlock:
    5959        2097 :         btrfs_end_transaction(trans, root);
    5960        2097 :         btrfs_balance_delayed_items(root);
    5961        2097 :         btrfs_btree_balance_dirty(root);
    5962        2097 :         if (drop_inode) {
    5963             :                 inode_dec_link_count(inode);
    5964           0 :                 iput(inode);
    5965             :         }
    5966        2097 :         return err;
    5967             : 
    5968             : out_unlock_inode:
    5969             :         drop_inode = 1;
    5970           0 :         unlock_new_inode(inode);
    5971           0 :         goto out_unlock;
    5972             : 
    5973             : }
    5974             : 
    5975       27728 : static int btrfs_create(struct inode *dir, struct dentry *dentry,
    5976             :                         umode_t mode, bool excl)
    5977             : {
    5978             :         struct btrfs_trans_handle *trans;
    5979       13864 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    5980             :         struct inode *inode = NULL;
    5981             :         int drop_inode_on_err = 0;
    5982             :         int err;
    5983             :         u64 objectid;
    5984       13864 :         u64 index = 0;
    5985             : 
    5986             :         /*
    5987             :          * 2 for inode item and ref
    5988             :          * 2 for dir items
    5989             :          * 1 for xattr if selinux is on
    5990             :          */
    5991       13864 :         trans = btrfs_start_transaction(root, 5);
    5992       13864 :         if (IS_ERR(trans))
    5993           0 :                 return PTR_ERR(trans);
    5994             : 
    5995       13864 :         err = btrfs_find_free_ino(root, &objectid);
    5996       13864 :         if (err)
    5997             :                 goto out_unlock;
    5998             : 
    5999       27728 :         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
    6000       13864 :                                 dentry->d_name.len, btrfs_ino(dir), objectid,
    6001             :                                 mode, &index);
    6002       13864 :         if (IS_ERR(inode)) {
    6003           0 :                 err = PTR_ERR(inode);
    6004           0 :                 goto out_unlock;
    6005             :         }
    6006             :         drop_inode_on_err = 1;
    6007             :         /*
    6008             :         * If the active LSM wants to access the inode during
    6009             :         * d_instantiate it needs these. Smack checks to see
    6010             :         * if the filesystem supports xattrs by looking at the
    6011             :         * ops vector.
    6012             :         */
    6013       13864 :         inode->i_fop = &btrfs_file_operations;
    6014       13864 :         inode->i_op = &btrfs_file_inode_operations;
    6015       13864 :         inode->i_mapping->a_ops = &btrfs_aops;
    6016       13864 :         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
    6017             : 
    6018       13864 :         err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
    6019       13864 :         if (err)
    6020             :                 goto out_unlock_inode;
    6021             : 
    6022       13864 :         err = btrfs_update_inode(trans, root, inode);
    6023       13864 :         if (err)
    6024             :                 goto out_unlock_inode;
    6025             : 
    6026       13864 :         err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
    6027       13864 :         if (err)
    6028             :                 goto out_unlock_inode;
    6029             : 
    6030       13864 :         BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
    6031       13864 :         unlock_new_inode(inode);
    6032       13864 :         d_instantiate(dentry, inode);
    6033             : 
    6034             : out_unlock:
    6035       13864 :         btrfs_end_transaction(trans, root);
    6036       13864 :         if (err && drop_inode_on_err) {
    6037             :                 inode_dec_link_count(inode);
    6038           0 :                 iput(inode);
    6039             :         }
    6040       13864 :         btrfs_balance_delayed_items(root);
    6041       13864 :         btrfs_btree_balance_dirty(root);
    6042       13864 :         return err;
    6043             : 
    6044             : out_unlock_inode:
    6045           0 :         unlock_new_inode(inode);
    6046           0 :         goto out_unlock;
    6047             : 
    6048             : }
    6049             : 
    6050        3573 : static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
    6051        3573 :                       struct dentry *dentry)
    6052             : {
    6053             :         struct btrfs_trans_handle *trans;
    6054        3573 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    6055        3573 :         struct inode *inode = old_dentry->d_inode;
    6056             :         u64 index;
    6057             :         int err;
    6058             :         int drop_inode = 0;
    6059             : 
    6060             :         /* do not allow sys_link's with other subvols of the same device */
    6061        3573 :         if (root->objectid != BTRFS_I(inode)->root->objectid)
    6062             :                 return -EXDEV;
    6063             : 
    6064        3573 :         if (inode->i_nlink >= BTRFS_LINK_MAX)
    6065             :                 return -EMLINK;
    6066             : 
    6067        3573 :         err = btrfs_set_inode_index(dir, &index);
    6068        3573 :         if (err)
    6069             :                 goto fail;
    6070             : 
    6071             :         /*
    6072             :          * 2 items for inode and inode ref
    6073             :          * 2 items for dir items
    6074             :          * 1 item for parent inode
    6075             :          */
    6076        3573 :         trans = btrfs_start_transaction(root, 5);
    6077        3573 :         if (IS_ERR(trans)) {
    6078           0 :                 err = PTR_ERR(trans);
    6079           0 :                 goto fail;
    6080             :         }
    6081             : 
    6082             :         /* There are several dir indexes for this inode, clear the cache. */
    6083        3573 :         BTRFS_I(inode)->dir_index = 0ULL;
    6084        3573 :         inc_nlink(inode);
    6085             :         inode_inc_iversion(inode);
    6086        3573 :         inode->i_ctime = CURRENT_TIME;
    6087        3573 :         ihold(inode);
    6088             :         set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
    6089             : 
    6090        3573 :         err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
    6091             : 
    6092        3573 :         if (err) {
    6093             :                 drop_inode = 1;
    6094             :         } else {
    6095        3573 :                 struct dentry *parent = dentry->d_parent;
    6096        3573 :                 err = btrfs_update_inode(trans, root, inode);
    6097        3573 :                 if (err)
    6098             :                         goto fail;
    6099        3573 :                 if (inode->i_nlink == 1) {
    6100             :                         /*
    6101             :                          * If new hard link count is 1, it's a file created
    6102             :                          * with open(2) O_TMPFILE flag.
    6103             :                          */
    6104           1 :                         err = btrfs_orphan_del(trans, inode);
    6105           1 :                         if (err)
    6106             :                                 goto fail;
    6107             :                 }
    6108        3573 :                 d_instantiate(dentry, inode);
    6109        3573 :                 btrfs_log_new_name(trans, inode, NULL, parent);
    6110             :         }
    6111             : 
    6112        3573 :         btrfs_end_transaction(trans, root);
    6113        3573 :         btrfs_balance_delayed_items(root);
    6114             : fail:
    6115        3573 :         if (drop_inode) {
    6116             :                 inode_dec_link_count(inode);
    6117           0 :                 iput(inode);
    6118             :         }
    6119        3573 :         btrfs_btree_balance_dirty(root);
    6120        3573 :         return err;
    6121             : }
    6122             : 
    6123        2413 : static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
    6124             : {
    6125             :         struct inode *inode = NULL;
    6126             :         struct btrfs_trans_handle *trans;
    6127        2413 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    6128             :         int err = 0;
    6129             :         int drop_on_err = 0;
    6130        2413 :         u64 objectid = 0;
    6131        2413 :         u64 index = 0;
    6132             : 
    6133             :         /*
    6134             :          * 2 items for inode and ref
    6135             :          * 2 items for dir items
    6136             :          * 1 for xattr if selinux is on
    6137             :          */
    6138        2413 :         trans = btrfs_start_transaction(root, 5);
    6139        2413 :         if (IS_ERR(trans))
    6140           0 :                 return PTR_ERR(trans);
    6141             : 
    6142        2413 :         err = btrfs_find_free_ino(root, &objectid);
    6143        2413 :         if (err)
    6144             :                 goto out_fail;
    6145             : 
    6146        4826 :         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
    6147        2413 :                                 dentry->d_name.len, btrfs_ino(dir), objectid,
    6148             :                                 S_IFDIR | mode, &index);
    6149        2413 :         if (IS_ERR(inode)) {
    6150           0 :                 err = PTR_ERR(inode);
    6151           0 :                 goto out_fail;
    6152             :         }
    6153             : 
    6154             :         drop_on_err = 1;
    6155             :         /* these must be set before we unlock the inode */
    6156        2413 :         inode->i_op = &btrfs_dir_inode_operations;
    6157        2413 :         inode->i_fop = &btrfs_dir_file_operations;
    6158             : 
    6159        2413 :         err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
    6160        2413 :         if (err)
    6161             :                 goto out_fail_inode;
    6162             : 
    6163             :         btrfs_i_size_write(inode, 0);
    6164        2413 :         err = btrfs_update_inode(trans, root, inode);
    6165        2413 :         if (err)
    6166             :                 goto out_fail_inode;
    6167             : 
    6168        4826 :         err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
    6169        2413 :                              dentry->d_name.len, 0, index);
    6170        2413 :         if (err)
    6171             :                 goto out_fail_inode;
    6172             : 
    6173        2413 :         d_instantiate(dentry, inode);
    6174             :         /*
    6175             :          * mkdir is special.  We're unlocking after we call d_instantiate
    6176             :          * to avoid a race with nfsd calling d_instantiate.
    6177             :          */
    6178        2413 :         unlock_new_inode(inode);
    6179             :         drop_on_err = 0;
    6180             : 
    6181             : out_fail:
    6182        2413 :         btrfs_end_transaction(trans, root);
    6183        2413 :         if (drop_on_err)
    6184           0 :                 iput(inode);
    6185        2413 :         btrfs_balance_delayed_items(root);
    6186        2413 :         btrfs_btree_balance_dirty(root);
    6187        2413 :         return err;
    6188             : 
    6189             : out_fail_inode:
    6190           0 :         unlock_new_inode(inode);
    6191           0 :         goto out_fail;
    6192             : }
    6193             : 
    6194             : /* helper for btfs_get_extent.  Given an existing extent in the tree,
    6195             :  * and an extent that you want to insert, deal with overlap and insert
    6196             :  * the new extent into the tree.
    6197             :  */
    6198         123 : static int merge_extent_mapping(struct extent_map_tree *em_tree,
    6199             :                                 struct extent_map *existing,
    6200         123 :                                 struct extent_map *em,
    6201             :                                 u64 map_start)
    6202             : {
    6203             :         u64 start_diff;
    6204             : 
    6205         246 :         BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
    6206         123 :         start_diff = map_start - em->start;
    6207         123 :         em->start = map_start;
    6208         123 :         em->len = existing->start - em->start;
    6209         123 :         if (em->block_start < EXTENT_MAP_LAST_BYTE &&
    6210             :             !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
    6211           0 :                 em->block_start += start_diff;
    6212           0 :                 em->block_len -= start_diff;
    6213             :         }
    6214         123 :         return add_extent_mapping(em_tree, em, 0);
    6215             : }
    6216             : 
    6217           0 : static noinline int uncompress_inline(struct btrfs_path *path,
    6218             :                                       struct inode *inode, struct page *page,
    6219             :                                       size_t pg_offset, u64 extent_offset,
    6220             :                                       struct btrfs_file_extent_item *item)
    6221             : {
    6222             :         int ret;
    6223           0 :         struct extent_buffer *leaf = path->nodes[0];
    6224             :         char *tmp;
    6225             :         size_t max_size;
    6226             :         unsigned long inline_size;
    6227             :         unsigned long ptr;
    6228             :         int compress_type;
    6229             : 
    6230           0 :         WARN_ON(pg_offset != 0);
    6231           0 :         compress_type = btrfs_file_extent_compression(leaf, item);
    6232             :         max_size = btrfs_file_extent_ram_bytes(leaf, item);
    6233           0 :         inline_size = btrfs_file_extent_inline_item_len(leaf,
    6234             :                                         btrfs_item_nr(path->slots[0]));
    6235             :         tmp = kmalloc(inline_size, GFP_NOFS);
    6236           0 :         if (!tmp)
    6237             :                 return -ENOMEM;
    6238             :         ptr = btrfs_file_extent_inline_start(item);
    6239             : 
    6240           0 :         read_extent_buffer(leaf, tmp, ptr, inline_size);
    6241             : 
    6242           0 :         max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
    6243           0 :         ret = btrfs_decompress(compress_type, tmp, page,
    6244             :                                extent_offset, inline_size, max_size);
    6245           0 :         kfree(tmp);
    6246             :         return ret;
    6247             : }
    6248             : 
    6249             : /*
    6250             :  * a bit scary, this does extent mapping from logical file offset to the disk.
    6251             :  * the ugly parts come from merging extents from the disk with the in-ram
    6252             :  * representation.  This gets more complex because of the data=ordered code,
    6253             :  * where the in-ram extents might be locked pending data=ordered completion.
    6254             :  *
    6255             :  * This also copies inline extents directly into the page.
    6256             :  */
    6257             : 
    6258     1605869 : struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
    6259             :                                     size_t pg_offset, u64 start, u64 len,
    6260             :                                     int create)
    6261             : {
    6262             :         int ret;
    6263             :         int err = 0;
    6264             :         u64 extent_start = 0;
    6265             :         u64 extent_end = 0;
    6266             :         u64 objectid = btrfs_ino(inode);
    6267             :         u32 found_type;
    6268           0 :         struct btrfs_path *path = NULL;
    6269     1605417 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    6270             :         struct btrfs_file_extent_item *item;
    6271        9778 :         struct extent_buffer *leaf;
    6272             :         struct btrfs_key found_key;
    6273       40793 :         struct extent_map *em = NULL;
    6274     1605417 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
    6275     1605417 :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
    6276             :         struct btrfs_trans_handle *trans = NULL;
    6277     1605417 :         const bool new_inline = !page || create;
    6278             : 
    6279             : again:
    6280     1605417 :         read_lock(&em_tree->lock);
    6281     1605436 :         em = lookup_extent_mapping(em_tree, start, len);
    6282     1605428 :         if (em)
    6283     1565195 :                 em->bdev = root->fs_info->fs_devices->latest_bdev;
    6284             :         read_unlock(&em_tree->lock);
    6285             : 
    6286     1605435 :         if (em) {
    6287     1565203 :                 if (em->start > start || em->start + em->len <= start)
    6288         153 :                         free_extent_map(em);
    6289     1565050 :                 else if (em->block_start == EXTENT_MAP_INLINE && page)
    6290           2 :                         free_extent_map(em);
    6291             :                 else
    6292             :                         goto out;
    6293             :         }
    6294       40388 :         em = alloc_extent_map();
    6295       40385 :         if (!em) {
    6296             :                 err = -ENOMEM;
    6297             :                 goto out;
    6298             :         }
    6299       40385 :         em->bdev = root->fs_info->fs_devices->latest_bdev;
    6300       40385 :         em->start = EXTENT_MAP_HOLE;
    6301       40385 :         em->orig_start = EXTENT_MAP_HOLE;
    6302       40385 :         em->len = (u64)-1;
    6303       40385 :         em->block_len = (u64)-1;
    6304             : 
    6305             :         if (!path) {
    6306       40385 :                 path = btrfs_alloc_path();
    6307       40385 :                 if (!path) {
    6308             :                         err = -ENOMEM;
    6309             :                         goto out;
    6310             :                 }
    6311             :                 /*
    6312             :                  * Chances are we'll be called again, so go ahead and do
    6313             :                  * readahead
    6314             :                  */
    6315       40386 :                 path->reada = 1;
    6316             :         }
    6317             : 
    6318       40386 :         ret = btrfs_lookup_file_extent(trans, root, path,
    6319             :                                        objectid, start, trans != NULL);
    6320       40386 :         if (ret < 0) {
    6321             :                 err = ret;
    6322             :                 goto out;
    6323             :         }
    6324             : 
    6325       40387 :         if (ret != 0) {
    6326        9638 :                 if (path->slots[0] == 0)
    6327             :                         goto not_found;
    6328        9644 :                 path->slots[0]--;
    6329             :         }
    6330             : 
    6331       40393 :         leaf = path->nodes[0];
    6332       80778 :         item = btrfs_item_ptr(leaf, path->slots[0],
    6333             :                               struct btrfs_file_extent_item);
    6334             :         /* are we inside the extent that was found? */
    6335       40385 :         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    6336       40388 :         found_type = btrfs_key_type(&found_key);
    6337       40388 :         if (found_key.objectid != objectid ||
    6338             :             found_type != BTRFS_EXTENT_DATA_KEY) {
    6339             :                 /*
    6340             :                  * If we backup past the first extent we want to move forward
    6341             :                  * and see if there is an extent in front of us, otherwise we'll
    6342             :                  * say there is a hole for our whole search range which can
    6343             :                  * cause problems.
    6344             :                  */
    6345             :                 extent_end = start;
    6346             :                 goto next;
    6347             :         }
    6348             : 
    6349       38441 :         found_type = btrfs_file_extent_type(leaf, item);
    6350       38441 :         extent_start = found_key.offset;
    6351       38441 :         if (found_type == BTRFS_FILE_EXTENT_REG ||
    6352             :             found_type == BTRFS_FILE_EXTENT_PREALLOC) {
    6353       37945 :                 extent_end = extent_start +
    6354             :                        btrfs_file_extent_num_bytes(leaf, item);
    6355         491 :         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
    6356             :                 size_t size;
    6357         491 :                 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
    6358         491 :                 extent_end = ALIGN(extent_start + size, root->sectorsize);
    6359             :         }
    6360             : next:
    6361       40527 :         if (start >= extent_end) {
    6362        9778 :                 path->slots[0]++;
    6363       19556 :                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
    6364        5247 :                         ret = btrfs_next_leaf(root, path);
    6365        5248 :                         if (ret < 0) {
    6366             :                                 err = ret;
    6367             :                                 goto out;
    6368             :                         }
    6369        5247 :                         if (ret > 0)
    6370             :                                 goto not_found;
    6371         244 :                         leaf = path->nodes[0];
    6372             :                 }
    6373        4775 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    6374        4932 :                 if (found_key.objectid != objectid ||
    6375         156 :                     found_key.type != BTRFS_EXTENT_DATA_KEY)
    6376             :                         goto not_found;
    6377         156 :                 if (start + len <= found_key.offset)
    6378             :                         goto not_found;
    6379         155 :                 if (start > found_key.offset)
    6380             :                         goto next;
    6381          12 :                 em->start = start;
    6382          12 :                 em->orig_start = start;
    6383          12 :                 em->len = found_key.offset - start;
    6384          12 :                 goto not_found_em;
    6385             :         }
    6386             : 
    6387       30749 :         btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
    6388             : 
    6389       30749 :         if (found_type == BTRFS_FILE_EXTENT_REG ||
    6390             :             found_type == BTRFS_FILE_EXTENT_PREALLOC) {
    6391             :                 goto insert;
    6392         491 :         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
    6393             :                 unsigned long ptr;
    6394             :                 char *map;
    6395             :                 size_t size;
    6396             :                 size_t extent_offset;
    6397             :                 size_t copy_size;
    6398             : 
    6399         491 :                 if (new_inline)
    6400             :                         goto out;
    6401             : 
    6402         452 :                 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
    6403         452 :                 extent_offset = page_offset(page) + pg_offset - extent_start;
    6404         452 :                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
    6405             :                                 size - extent_offset);
    6406         452 :                 em->start = extent_start + extent_offset;
    6407         452 :                 em->len = ALIGN(copy_size, root->sectorsize);
    6408         452 :                 em->orig_block_len = em->len;
    6409         452 :                 em->orig_start = em->start;
    6410         452 :                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
    6411         904 :                 if (create == 0 && !PageUptodate(page)) {
    6412         452 :                         if (btrfs_file_extent_compression(leaf, item) !=
    6413             :                             BTRFS_COMPRESS_NONE) {
    6414           0 :                                 ret = uncompress_inline(path, inode, page,
    6415             :                                                         pg_offset,
    6416             :                                                         extent_offset, item);
    6417           0 :                                 if (ret) {
    6418             :                                         err = ret;
    6419             :                                         goto out;
    6420             :                                 }
    6421             :                         } else {
    6422             :                                 map = kmap(page);
    6423         452 :                                 read_extent_buffer(leaf, map + pg_offset, ptr,
    6424             :                                                    copy_size);
    6425         452 :                                 if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
    6426         452 :                                         memset(map + pg_offset + copy_size, 0,
    6427         452 :                                                PAGE_CACHE_SIZE - pg_offset -
    6428             :                                                copy_size);
    6429             :                                 }
    6430             :                                 kunmap(page);
    6431             :                         }
    6432             :                         flush_dcache_page(page);
    6433           0 :                 } else if (create && PageUptodate(page)) {
    6434           0 :                         BUG();
    6435             :                         if (!trans) {
    6436             :                                 kunmap(page);
    6437             :                                 free_extent_map(em);
    6438             :                                 em = NULL;
    6439             : 
    6440             :                                 btrfs_release_path(path);
    6441             :                                 trans = btrfs_join_transaction(root);
    6442             : 
    6443             :                                 if (IS_ERR(trans))
    6444             :                                         return ERR_CAST(trans);
    6445             :                                 goto again;
    6446             :                         }
    6447             :                         map = kmap(page);
    6448             :                         write_extent_buffer(leaf, map + pg_offset, ptr,
    6449             :                                             copy_size);
    6450             :                         kunmap(page);
    6451             :                         btrfs_mark_buffer_dirty(leaf);
    6452             :                 }
    6453         452 :                 set_extent_uptodate(io_tree, em->start,
    6454             :                                     extent_map_end(em) - 1, NULL, GFP_NOFS);
    6455         452 :                 goto insert;
    6456             :         }
    6457             : not_found:
    6458        9618 :         em->start = start;
    6459        9618 :         em->orig_start = start;
    6460        9618 :         em->len = len;
    6461             : not_found_em:
    6462        9630 :         em->block_start = EXTENT_MAP_HOLE;
    6463             :         set_bit(EXTENT_FLAG_VACANCY, &em->flags);
    6464             : insert:
    6465       40350 :         btrfs_release_path(path);
    6466       80685 :         if (em->start > start || extent_map_end(em) <= start) {
    6467           3 :                 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
    6468             :                         em->start, em->len, start, len);
    6469             :                 err = -EIO;
    6470           0 :                 goto out;
    6471             :         }
    6472             : 
    6473             :         err = 0;
    6474       40341 :         write_lock(&em_tree->lock);
    6475       40350 :         ret = add_extent_mapping(em_tree, em, 0);
    6476             :         /* it is possible that someone inserted the extent into the tree
    6477             :          * while we had the lock dropped.  It is also possible that
    6478             :          * an overlapping map exists in the tree
    6479             :          */
    6480       40347 :         if (ret == -EEXIST) {
    6481             :                 struct extent_map *existing;
    6482             : 
    6483             :                 ret = 0;
    6484             : 
    6485         125 :                 existing = lookup_extent_mapping(em_tree, start, len);
    6486         127 :                 if (existing && (existing->start > start ||
    6487           2 :                     existing->start + existing->len <= start)) {
    6488         123 :                         free_extent_map(existing);
    6489             :                         existing = NULL;
    6490             :                 }
    6491         125 :                 if (!existing) {
    6492         123 :                         existing = lookup_extent_mapping(em_tree, em->start,
    6493             :                                                          em->len);
    6494         123 :                         if (existing) {
    6495         123 :                                 err = merge_extent_mapping(em_tree, existing,
    6496             :                                                            em, start);
    6497         123 :                                 free_extent_map(existing);
    6498         123 :                                 if (err) {
    6499           0 :                                         free_extent_map(em);
    6500             :                                         em = NULL;
    6501             :                                 }
    6502             :                         } else {
    6503             :                                 err = -EIO;
    6504           0 :                                 free_extent_map(em);
    6505             :                                 em = NULL;
    6506             :                         }
    6507             :                 } else {
    6508           2 :                         free_extent_map(em);
    6509             :                         em = existing;
    6510             :                         err = 0;
    6511             :                 }
    6512             :         }
    6513             :         write_unlock(&em_tree->lock);
    6514             : out:
    6515             : 
    6516     1605432 :         trace_btrfs_get_extent(root, em);
    6517             : 
    6518     1605429 :         if (path)
    6519       40384 :                 btrfs_free_path(path);
    6520             :         if (trans) {
    6521             :                 ret = btrfs_end_transaction(trans, root);
    6522             :                 if (!err)
    6523             :                         err = ret;
    6524             :         }
    6525     1605428 :         if (err) {
    6526           0 :                 free_extent_map(em);
    6527           0 :                 return ERR_PTR(err);
    6528             :         }
    6529     1605428 :         BUG_ON(!em); /* Error is always set */
    6530             :         return em;
    6531             : }
    6532             : 
    6533        1891 : struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
    6534             :                                            size_t pg_offset, u64 start, u64 len,
    6535             :                                            int create)
    6536             : {
    6537             :         struct extent_map *em;
    6538          73 :         struct extent_map *hole_em = NULL;
    6539        1891 :         u64 range_start = start;
    6540             :         u64 end;
    6541             :         u64 found;
    6542             :         u64 found_end;
    6543             :         int err = 0;
    6544             : 
    6545        1891 :         em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
    6546        1891 :         if (IS_ERR(em))
    6547             :                 return em;
    6548        1891 :         if (em) {
    6549             :                 /*
    6550             :                  * if our em maps to
    6551             :                  * -  a hole or
    6552             :                  * -  a pre-alloc extent,
    6553             :                  * there might actually be delalloc bytes behind it.
    6554             :                  */
    6555        3279 :                 if (em->block_start != EXTENT_MAP_HOLE &&
    6556             :                     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
    6557             :                         return em;
    6558             :                 else
    6559             :                         hole_em = em;
    6560             :         }
    6561             : 
    6562             :         /* check to see if we've wrapped (len == -1 or similar) */
    6563         688 :         end = start + len;
    6564         688 :         if (end < start)
    6565             :                 end = (u64)-1;
    6566             :         else
    6567         688 :                 end -= 1;
    6568             : 
    6569             :         em = NULL;
    6570             : 
    6571             :         /* ok, we didn't find anything, lets look for delalloc */
    6572         688 :         found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
    6573             :                                  end, len, EXTENT_DELALLOC, 1);
    6574         688 :         found_end = range_start + found;
    6575         688 :         if (found_end < range_start)
    6576             :                 found_end = (u64)-1;
    6577             : 
    6578             :         /*
    6579             :          * we didn't find anything useful, return
    6580             :          * the original results from get_extent()
    6581             :          */
    6582         688 :         if (range_start > end || found_end <= start) {
    6583             :                 em = hole_em;
    6584             :                 hole_em = NULL;
    6585             :                 goto out;
    6586             :         }
    6587             : 
    6588             :         /* adjust the range_start to make sure it doesn't
    6589             :          * go backwards from the start they passed in
    6590             :          */
    6591          73 :         range_start = max(start, range_start);
    6592          73 :         found = found_end - range_start;
    6593             : 
    6594          73 :         if (found > 0) {
    6595             :                 u64 hole_start = start;
    6596             :                 u64 hole_len = len;
    6597             : 
    6598          73 :                 em = alloc_extent_map();
    6599          73 :                 if (!em) {
    6600             :                         err = -ENOMEM;
    6601             :                         goto out;
    6602             :                 }
    6603             :                 /*
    6604             :                  * when btrfs_get_extent can't find anything it
    6605             :                  * returns one huge hole
    6606             :                  *
    6607             :                  * make sure what it found really fits our range, and
    6608             :                  * adjust to make sure it is based on the start from
    6609             :                  * the caller
    6610             :                  */
    6611          73 :                 if (hole_em) {
    6612             :                         u64 calc_end = extent_map_end(hole_em);
    6613             : 
    6614          73 :                         if (calc_end <= start || (hole_em->start > end)) {
    6615           0 :                                 free_extent_map(hole_em);
    6616           0 :                                 hole_em = NULL;
    6617             :                         } else {
    6618          73 :                                 hole_start = max(hole_em->start, start);
    6619          73 :                                 hole_len = calc_end - hole_start;
    6620             :                         }
    6621             :                 }
    6622          73 :                 em->bdev = NULL;
    6623          73 :                 if (hole_em && range_start > hole_start) {
    6624             :                         /* our hole starts before our delalloc, so we
    6625             :                          * have to return just the parts of the hole
    6626             :                          * that go until  the delalloc starts
    6627             :                          */
    6628          26 :                         em->len = min(hole_len,
    6629             :                                       range_start - hole_start);
    6630          26 :                         em->start = hole_start;
    6631          26 :                         em->orig_start = hole_start;
    6632             :                         /*
    6633             :                          * don't adjust block start at all,
    6634             :                          * it is fixed at EXTENT_MAP_HOLE
    6635             :                          */
    6636          26 :                         em->block_start = hole_em->block_start;
    6637          26 :                         em->block_len = hole_len;
    6638          26 :                         if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
    6639             :                                 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
    6640             :                 } else {
    6641          47 :                         em->start = range_start;
    6642          47 :                         em->len = found;
    6643          47 :                         em->orig_start = range_start;
    6644          47 :                         em->block_start = EXTENT_MAP_DELALLOC;
    6645          47 :                         em->block_len = found;
    6646             :                 }
    6647           0 :         } else if (hole_em) {
    6648             :                 return hole_em;
    6649             :         }
    6650             : out:
    6651             : 
    6652         688 :         free_extent_map(hole_em);
    6653         688 :         if (err) {
    6654           0 :                 free_extent_map(em);
    6655           0 :                 return ERR_PTR(err);
    6656             :         }
    6657             :         return em;
    6658             : }
    6659             : 
    6660       25259 : static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
    6661             :                                                   u64 start, u64 len)
    6662             : {
    6663       25259 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    6664             :         struct extent_map *em;
    6665             :         struct btrfs_key ins;
    6666             :         u64 alloc_hint;
    6667             :         int ret;
    6668             : 
    6669       25259 :         alloc_hint = get_extent_allocation_hint(inode, start, len);
    6670       25260 :         ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
    6671             :                                    alloc_hint, &ins, 1, 1);
    6672       25259 :         if (ret)
    6673           0 :                 return ERR_PTR(ret);
    6674             : 
    6675       25259 :         em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
    6676             :                               ins.offset, ins.offset, ins.offset, 0);
    6677       25254 :         if (IS_ERR(em)) {
    6678           0 :                 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
    6679           0 :                 return em;
    6680             :         }
    6681             : 
    6682       25254 :         ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
    6683             :                                            ins.offset, ins.offset, 0);
    6684       25259 :         if (ret) {
    6685           0 :                 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
    6686           0 :                 free_extent_map(em);
    6687           0 :                 return ERR_PTR(ret);
    6688             :         }
    6689             : 
    6690             :         return em;
    6691             : }
    6692             : 
    6693             : /*
    6694             :  * returns 1 when the nocow is safe, < 1 on error, 0 if the
    6695             :  * block must be cow'd
    6696             :  */
    6697           0 : noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
    6698             :                               u64 *orig_start, u64 *orig_block_len,
    6699             :                               u64 *ram_bytes)
    6700             : {
    6701             :         struct btrfs_trans_handle *trans;
    6702             :         struct btrfs_path *path;
    6703             :         int ret;
    6704             :         struct extent_buffer *leaf;
    6705           0 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    6706           0 :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
    6707             :         struct btrfs_file_extent_item *fi;
    6708             :         struct btrfs_key key;
    6709             :         u64 disk_bytenr;
    6710             :         u64 backref_offset;
    6711             :         u64 extent_end;
    6712             :         u64 num_bytes;
    6713             :         int slot;
    6714             :         int found_type;
    6715           0 :         bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
    6716             : 
    6717           0 :         path = btrfs_alloc_path();
    6718           0 :         if (!path)
    6719             :                 return -ENOMEM;
    6720             : 
    6721           0 :         ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
    6722             :                                        offset, 0);
    6723           0 :         if (ret < 0)
    6724             :                 goto out;
    6725             : 
    6726           0 :         slot = path->slots[0];
    6727           0 :         if (ret == 1) {
    6728           0 :                 if (slot == 0) {
    6729             :                         /* can't find the item, must cow */
    6730             :                         ret = 0;
    6731             :                         goto out;
    6732             :                 }
    6733           0 :                 slot--;
    6734             :         }
    6735             :         ret = 0;
    6736           0 :         leaf = path->nodes[0];
    6737           0 :         btrfs_item_key_to_cpu(leaf, &key, slot);
    6738           0 :         if (key.objectid != btrfs_ino(inode) ||
    6739           0 :             key.type != BTRFS_EXTENT_DATA_KEY) {
    6740             :                 /* not our file or wrong item type, must cow */
    6741             :                 goto out;
    6742             :         }
    6743             : 
    6744           0 :         if (key.offset > offset) {
    6745             :                 /* Wrong offset, must cow */
    6746             :                 goto out;
    6747             :         }
    6748             : 
    6749           0 :         fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    6750           0 :         found_type = btrfs_file_extent_type(leaf, fi);
    6751           0 :         if (found_type != BTRFS_FILE_EXTENT_REG &&
    6752             :             found_type != BTRFS_FILE_EXTENT_PREALLOC) {
    6753             :                 /* not a regular extent, must cow */
    6754             :                 goto out;
    6755             :         }
    6756             : 
    6757           0 :         if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
    6758             :                 goto out;
    6759             : 
    6760           0 :         extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
    6761           0 :         if (extent_end <= offset)
    6762             :                 goto out;
    6763             : 
    6764             :         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
    6765           0 :         if (disk_bytenr == 0)
    6766             :                 goto out;
    6767             : 
    6768           0 :         if (btrfs_file_extent_compression(leaf, fi) ||
    6769           0 :             btrfs_file_extent_encryption(leaf, fi) ||
    6770             :             btrfs_file_extent_other_encoding(leaf, fi))
    6771             :                 goto out;
    6772             : 
    6773             :         backref_offset = btrfs_file_extent_offset(leaf, fi);
    6774             : 
    6775           0 :         if (orig_start) {
    6776           0 :                 *orig_start = key.offset - backref_offset;
    6777           0 :                 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
    6778           0 :                 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
    6779             :         }
    6780             : 
    6781           0 :         if (btrfs_extent_readonly(root, disk_bytenr))
    6782             :                 goto out;
    6783             : 
    6784           0 :         num_bytes = min(offset + *len, extent_end) - offset;
    6785           0 :         if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
    6786             :                 u64 range_end;
    6787             : 
    6788           0 :                 range_end = round_up(offset + num_bytes, root->sectorsize) - 1;
    6789           0 :                 ret = test_range_bit(io_tree, offset, range_end,
    6790             :                                      EXTENT_DELALLOC, 0, NULL);
    6791           0 :                 if (ret) {
    6792             :                         ret = -EAGAIN;
    6793             :                         goto out;
    6794             :                 }
    6795             :         }
    6796             : 
    6797           0 :         btrfs_release_path(path);
    6798             : 
    6799             :         /*
    6800             :          * look for other files referencing this extent, if we
    6801             :          * find any we must cow
    6802             :          */
    6803           0 :         trans = btrfs_join_transaction(root);
    6804           0 :         if (IS_ERR(trans)) {
    6805             :                 ret = 0;
    6806             :                 goto out;
    6807             :         }
    6808             : 
    6809           0 :         ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
    6810           0 :                                     key.offset - backref_offset, disk_bytenr);
    6811           0 :         btrfs_end_transaction(trans, root);
    6812           0 :         if (ret) {
    6813             :                 ret = 0;
    6814             :                 goto out;
    6815             :         }
    6816             : 
    6817             :         /*
    6818             :          * adjust disk_bytenr and num_bytes to cover just the bytes
    6819             :          * in this extent we are about to write.  If there
    6820             :          * are any csums in that range we have to cow in order
    6821             :          * to keep the csums correct
    6822             :          */
    6823           0 :         disk_bytenr += backref_offset;
    6824           0 :         disk_bytenr += offset - key.offset;
    6825           0 :         if (csum_exist_in_range(root, disk_bytenr, num_bytes))
    6826             :                                 goto out;
    6827             :         /*
    6828             :          * all of the above have passed, it is safe to overwrite this extent
    6829             :          * without cow
    6830             :          */
    6831           0 :         *len = num_bytes;
    6832             :         ret = 1;
    6833             : out:
    6834           0 :         btrfs_free_path(path);
    6835           0 :         return ret;
    6836             : }
    6837             : 
    6838       25365 : bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
    6839             : {
    6840       25365 :         struct radix_tree_root *root = &inode->i_mapping->page_tree;
    6841             :         int found = false;
    6842       25365 :         void **pagep = NULL;
    6843             :         struct page *page = NULL;
    6844             :         int start_idx;
    6845             :         int end_idx;
    6846             : 
    6847       25365 :         start_idx = start >> PAGE_CACHE_SHIFT;
    6848             : 
    6849             :         /*
    6850             :          * end is the last byte in the last page.  end == start is legal
    6851             :          */
    6852       25365 :         end_idx = end >> PAGE_CACHE_SHIFT;
    6853             : 
    6854             :         rcu_read_lock();
    6855             : 
    6856             :         /* Most of the code in this while loop is lifted from
    6857             :          * find_get_page.  It's been modified to begin searching from a
    6858             :          * page and return just the first page found in that range.  If the
    6859             :          * found idx is less than or equal to the end idx then we know that
    6860             :          * a page exists.  If no pages are found or if those pages are
    6861             :          * outside of the range then we're fine (yay!) */
    6862       50764 :         while (page == NULL &&
    6863       25364 :                radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
    6864          34 :                 page = radix_tree_deref_slot(pagep);
    6865          34 :                 if (unlikely(!page))
    6866             :                         break;
    6867             : 
    6868          34 :                 if (radix_tree_exception(page)) {
    6869           0 :                         if (radix_tree_deref_retry(page)) {
    6870             :                                 page = NULL;
    6871           0 :                                 continue;
    6872             :                         }
    6873             :                         /*
    6874             :                          * Otherwise, shmem/tmpfs must be storing a swap entry
    6875             :                          * here as an exceptional entry: so return it without
    6876             :                          * attempting to raise page count.
    6877             :                          */
    6878             :                         page = NULL;
    6879             :                         break; /* TODO: Is this relevant for this use case? */
    6880             :                 }
    6881             : 
    6882          34 :                 if (!page_cache_get_speculative(page)) {
    6883             :                         page = NULL;
    6884           0 :                         continue;
    6885             :                 }
    6886             : 
    6887             :                 /*
    6888             :                  * Has the page moved?
    6889             :                  * This is part of the lockless pagecache protocol. See
    6890             :                  * include/linux/pagemap.h for details.
    6891             :                  */
    6892          34 :                 if (unlikely(page != *pagep)) {
    6893           0 :                         page_cache_release(page);
    6894             :                         page = NULL;
    6895             :                 }
    6896             :         }
    6897             : 
    6898       25366 :         if (page) {
    6899          34 :                 if (page->index <= end_idx)
    6900             :                         found = true;
    6901          34 :                 page_cache_release(page);
    6902             :         }
    6903             : 
    6904             :         rcu_read_unlock();
    6905       25365 :         return found;
    6906             : }
    6907             : 
    6908       25259 : static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
    6909             :                               struct extent_state **cached_state, int writing)
    6910             : {
    6911             :         struct btrfs_ordered_extent *ordered;
    6912             :         int ret = 0;
    6913             : 
    6914             :         while (1) {
    6915       25259 :                 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
    6916             :                                  0, cached_state);
    6917             :                 /*
    6918             :                  * We're concerned with the entire range that we're going to be
    6919             :                  * doing DIO to, so we need to make sure theres no ordered
    6920             :                  * extents in this range.
    6921             :                  */
    6922       25257 :                 ordered = btrfs_lookup_ordered_range(inode, lockstart,
    6923       25257 :                                                      lockend - lockstart + 1);
    6924             : 
    6925             :                 /*
    6926             :                  * We need to make sure there are no buffered pages in this
    6927             :                  * range either, we could have raced between the invalidate in
    6928             :                  * generic_file_direct_write and locking the extent.  The
    6929             :                  * invalidate needs to happen so that reads after a write do not
    6930             :                  * get stale data.
    6931             :                  */
    6932       25257 :                 if (!ordered &&
    6933       25252 :                     (!writing ||
    6934       25256 :                      !btrfs_page_exists_in_range(inode, lockstart, lockend)))
    6935             :                         break;
    6936             : 
    6937           0 :                 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
    6938             :                                      cached_state, GFP_NOFS);
    6939             : 
    6940           0 :                 if (ordered) {
    6941           0 :                         btrfs_start_ordered_extent(inode, ordered, 1);
    6942           0 :                         btrfs_put_ordered_extent(ordered);
    6943             :                 } else {
    6944             :                         /* Screw you mmap */
    6945           0 :                         ret = filemap_write_and_wait_range(inode->i_mapping,
    6946             :                                                            lockstart,
    6947             :                                                            lockend);
    6948           0 :                         if (ret)
    6949             :                                 break;
    6950             : 
    6951             :                         /*
    6952             :                          * If we found a page that couldn't be invalidated just
    6953             :                          * fall back to buffered.
    6954             :                          */
    6955           0 :                         ret = invalidate_inode_pages2_range(inode->i_mapping,
    6956           0 :                                         lockstart >> PAGE_CACHE_SHIFT,
    6957           0 :                                         lockend >> PAGE_CACHE_SHIFT);
    6958           0 :                         if (ret)
    6959             :                                 break;
    6960             :                 }
    6961             : 
    6962           0 :                 cond_resched();
    6963           0 :         }
    6964             : 
    6965       25254 :         return ret;
    6966             : }
    6967             : 
    6968       25256 : static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
    6969             :                                            u64 len, u64 orig_start,
    6970             :                                            u64 block_start, u64 block_len,
    6971             :                                            u64 orig_block_len, u64 ram_bytes,
    6972             :                                            int type)
    6973             : {
    6974             :         struct extent_map_tree *em_tree;
    6975             :         struct extent_map *em;
    6976       25256 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    6977             :         int ret;
    6978             : 
    6979       25256 :         em_tree = &BTRFS_I(inode)->extent_tree;
    6980       25256 :         em = alloc_extent_map();
    6981       25257 :         if (!em)
    6982             :                 return ERR_PTR(-ENOMEM);
    6983             : 
    6984       25257 :         em->start = start;
    6985       25257 :         em->orig_start = orig_start;
    6986       25257 :         em->mod_start = start;
    6987       25257 :         em->mod_len = len;
    6988       25257 :         em->len = len;
    6989       25257 :         em->block_len = block_len;
    6990       25257 :         em->block_start = block_start;
    6991       25257 :         em->bdev = root->fs_info->fs_devices->latest_bdev;
    6992       25257 :         em->orig_block_len = orig_block_len;
    6993       25257 :         em->ram_bytes = ram_bytes;
    6994       25257 :         em->generation = -1;
    6995             :         set_bit(EXTENT_FLAG_PINNED, &em->flags);
    6996       25259 :         if (type == BTRFS_ORDERED_PREALLOC)
    6997             :                 set_bit(EXTENT_FLAG_FILLING, &em->flags);
    6998             : 
    6999             :         do {
    7000       25254 :                 btrfs_drop_extent_cache(inode, em->start,
    7001       25254 :                                 em->start + em->len - 1, 0);
    7002       25256 :                 write_lock(&em_tree->lock);
    7003       25259 :                 ret = add_extent_mapping(em_tree, em, 1);
    7004             :                 write_unlock(&em_tree->lock);
    7005       25255 :         } while (ret == -EEXIST);
    7006             : 
    7007       25255 :         if (ret) {
    7008           0 :                 free_extent_map(em);
    7009           0 :                 return ERR_PTR(ret);
    7010             :         }
    7011             : 
    7012             :         return em;
    7013             : }
    7014             : 
    7015             : 
    7016       50518 : static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
    7017             :                                    struct buffer_head *bh_result, int create)
    7018             : {
    7019             :         struct extent_map *em;
    7020       25258 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7021       25258 :         struct extent_state *cached_state = NULL;
    7022       25258 :         u64 start = iblock << inode->i_blkbits;
    7023             :         u64 lockstart, lockend;
    7024       25258 :         u64 len = bh_result->b_size;
    7025             :         int unlock_bits = EXTENT_LOCKED;
    7026             :         int ret = 0;
    7027             : 
    7028       25258 :         if (create)
    7029             :                 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
    7030             :         else
    7031           0 :                 len = min_t(u64, len, root->sectorsize);
    7032             : 
    7033             :         lockstart = start;
    7034       25258 :         lockend = start + len - 1;
    7035             : 
    7036             :         /*
    7037             :          * If this errors out it's because we couldn't invalidate pagecache for
    7038             :          * this range and we need to fallback to buffered.
    7039             :          */
    7040       25258 :         if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
    7041             :                 return -ENOTBLK;
    7042             : 
    7043       25256 :         em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
    7044       25253 :         if (IS_ERR(em)) {
    7045           0 :                 ret = PTR_ERR(em);
    7046           0 :                 goto unlock_err;
    7047             :         }
    7048             : 
    7049             :         /*
    7050             :          * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
    7051             :          * io.  INLINE is special, and we could probably kludge it in here, but
    7052             :          * it's still buffered so for safety lets just fall back to the generic
    7053             :          * buffered path.
    7054             :          *
    7055             :          * For COMPRESSED we _have_ to read the entire extent in so we can
    7056             :          * decompress it, so there will be buffering required no matter what we
    7057             :          * do, so go ahead and fallback to buffered.
    7058             :          *
    7059             :          * We return -ENOTBLK because thats what makes DIO go ahead and go back
    7060             :          * to buffered IO.  Don't blame me, this is the price we pay for using
    7061             :          * the generic code.
    7062             :          */
    7063       50507 :         if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
    7064       25254 :             em->block_start == EXTENT_MAP_INLINE) {
    7065           0 :                 free_extent_map(em);
    7066             :                 ret = -ENOTBLK;
    7067           0 :                 goto unlock_err;
    7068             :         }
    7069             : 
    7070             :         /* Just a good old fashioned hole, return */
    7071       25254 :         if (!create && (em->block_start == EXTENT_MAP_HOLE ||
    7072             :                         test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
    7073           0 :                 free_extent_map(em);
    7074           0 :                 goto unlock_err;
    7075             :         }
    7076             : 
    7077             :         /*
    7078             :          * We don't allocate a new extent in the following cases
    7079             :          *
    7080             :          * 1) The inode is marked as NODATACOW.  In this case we'll just use the
    7081             :          * existing extent.
    7082             :          * 2) The extent is marked as PREALLOC.  We're good to go here and can
    7083             :          * just use the extent.
    7084             :          *
    7085             :          */
    7086       25254 :         if (!create) {
    7087           0 :                 len = min(len, em->len - (start - em->start));
    7088           0 :                 lockstart = start + len;
    7089           0 :                 goto unlock;
    7090             :         }
    7091             : 
    7092       50507 :         if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
    7093       25253 :             ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
    7094             :              em->block_start != EXTENT_MAP_HOLE)) {
    7095             :                 int type;
    7096             :                 int ret;
    7097             :                 u64 block_start, orig_start, orig_block_len, ram_bytes;
    7098             : 
    7099           0 :                 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
    7100             :                         type = BTRFS_ORDERED_PREALLOC;
    7101             :                 else
    7102             :                         type = BTRFS_ORDERED_NOCOW;
    7103           0 :                 len = min(len, em->len - (start - em->start));
    7104           0 :                 block_start = em->block_start + (start - em->start);
    7105             : 
    7106           0 :                 if (can_nocow_extent(inode, start, &len, &orig_start,
    7107             :                                      &orig_block_len, &ram_bytes) == 1) {
    7108           0 :                         if (type == BTRFS_ORDERED_PREALLOC) {
    7109           0 :                                 free_extent_map(em);
    7110           0 :                                 em = create_pinned_em(inode, start, len,
    7111             :                                                        orig_start,
    7112             :                                                        block_start, len,
    7113             :                                                        orig_block_len,
    7114             :                                                        ram_bytes, type);
    7115           0 :                                 if (IS_ERR(em))
    7116             :                                         goto unlock_err;
    7117             :                         }
    7118             : 
    7119           0 :                         ret = btrfs_add_ordered_extent_dio(inode, start,
    7120             :                                            block_start, len, len, type);
    7121           0 :                         if (ret) {
    7122           0 :                                 free_extent_map(em);
    7123           0 :                                 goto unlock_err;
    7124             :                         }
    7125           0 :                         goto unlock;
    7126             :                 }
    7127             :         }
    7128             : 
    7129             :         /*
    7130             :          * this will cow the extent, reset the len in case we changed
    7131             :          * it above
    7132             :          */
    7133       25254 :         len = bh_result->b_size;
    7134       25254 :         free_extent_map(em);
    7135       25260 :         em = btrfs_new_extent_direct(inode, start, len);
    7136       25259 :         if (IS_ERR(em)) {
    7137           0 :                 ret = PTR_ERR(em);
    7138           0 :                 goto unlock_err;
    7139             :         }
    7140       25259 :         len = min(len, em->len - (start - em->start));
    7141             : unlock:
    7142       50518 :         bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
    7143       25259 :                 inode->i_blkbits;
    7144       25259 :         bh_result->b_size = len;
    7145       25259 :         bh_result->b_bdev = em->bdev;
    7146             :         set_buffer_mapped(bh_result);
    7147       25259 :         if (create) {
    7148       25260 :                 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
    7149             :                         set_buffer_new(bh_result);
    7150             : 
    7151             :                 /*
    7152             :                  * Need to update the i_size under the extent lock so buffered
    7153             :                  * readers will get the updated i_size when we unlock.
    7154             :                  */
    7155       50520 :                 if (start + len > i_size_read(inode))
    7156        3813 :                         i_size_write(inode, start + len);
    7157             : 
    7158             :                 spin_lock(&BTRFS_I(inode)->lock);
    7159       25260 :                 BTRFS_I(inode)->outstanding_extents++;
    7160             :                 spin_unlock(&BTRFS_I(inode)->lock);
    7161             : 
    7162       25259 :                 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
    7163       25259 :                                      lockstart + len - 1, EXTENT_DELALLOC, NULL,
    7164             :                                      &cached_state, GFP_NOFS);
    7165       25259 :                 BUG_ON(ret);
    7166             :         }
    7167             : 
    7168             :         /*
    7169             :          * In the case of write we need to clear and unlock the entire range,
    7170             :          * in the case of read we need to unlock only the end area that we
    7171             :          * aren't using if there is any left over space.
    7172             :          */
    7173       25258 :         if (lockstart < lockend) {
    7174       25258 :                 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
    7175             :                                  lockend, unlock_bits, 1, 0,
    7176             :                                  &cached_state, GFP_NOFS);
    7177             :         } else {
    7178           0 :                 free_extent_state(cached_state);
    7179             :         }
    7180             : 
    7181       25257 :         free_extent_map(em);
    7182             : 
    7183       25260 :         return 0;
    7184             : 
    7185             : unlock_err:
    7186           0 :         clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
    7187             :                          unlock_bits, 1, 0, &cached_state, GFP_NOFS);
    7188           0 :         return ret;
    7189             : }
    7190             : 
    7191           0 : static void btrfs_endio_direct_read(struct bio *bio, int err)
    7192             : {
    7193           0 :         struct btrfs_dio_private *dip = bio->bi_private;
    7194             :         struct bio_vec *bvec;
    7195           0 :         struct inode *inode = dip->inode;
    7196           0 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7197             :         struct bio *dio_bio;
    7198           0 :         u32 *csums = (u32 *)dip->csum;
    7199             :         u64 start;
    7200             :         int i;
    7201             : 
    7202           0 :         start = dip->logical_offset;
    7203           0 :         bio_for_each_segment_all(bvec, bio, i) {
    7204           0 :                 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
    7205           0 :                         struct page *page = bvec->bv_page;
    7206             :                         char *kaddr;
    7207           0 :                         u32 csum = ~(u32)0;
    7208             :                         unsigned long flags;
    7209             : 
    7210           0 :                         local_irq_save(flags);
    7211             :                         kaddr = kmap_atomic(page);
    7212           0 :                         csum = btrfs_csum_data(kaddr + bvec->bv_offset,
    7213           0 :                                                csum, bvec->bv_len);
    7214           0 :                         btrfs_csum_final(csum, (char *)&csum);
    7215             :                         kunmap_atomic(kaddr);
    7216           0 :                         local_irq_restore(flags);
    7217             : 
    7218             :                         flush_dcache_page(bvec->bv_page);
    7219           0 :                         if (csum != csums[i]) {
    7220           0 :                                 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
    7221             :                                           btrfs_ino(inode), start, csum,
    7222             :                                           csums[i]);
    7223             :                                 err = -EIO;
    7224             :                         }
    7225             :                 }
    7226             : 
    7227           0 :                 start += bvec->bv_len;
    7228             :         }
    7229             : 
    7230           0 :         unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
    7231           0 :                       dip->logical_offset + dip->bytes - 1);
    7232           0 :         dio_bio = dip->dio_bio;
    7233             : 
    7234           0 :         kfree(dip);
    7235             : 
    7236             :         /* If we had a csum failure make sure to clear the uptodate flag */
    7237           0 :         if (err)
    7238             :                 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
    7239           0 :         dio_end_io(dio_bio, err);
    7240           0 :         bio_put(bio);
    7241           0 : }
    7242             : 
    7243       25258 : static void btrfs_endio_direct_write(struct bio *bio, int err)
    7244             : {
    7245       25258 :         struct btrfs_dio_private *dip = bio->bi_private;
    7246       25258 :         struct inode *inode = dip->inode;
    7247       25258 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7248       25258 :         struct btrfs_ordered_extent *ordered = NULL;
    7249       25258 :         u64 ordered_offset = dip->logical_offset;
    7250       25258 :         u64 ordered_bytes = dip->bytes;
    7251             :         struct bio *dio_bio;
    7252             :         int ret;
    7253             : 
    7254       25258 :         if (err)
    7255             :                 goto out_done;
    7256             : again:
    7257       25260 :         ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
    7258             :                                                    &ordered_offset,
    7259             :                                                    ordered_bytes, !err);
    7260       25260 :         if (!ret)
    7261             :                 goto out_test;
    7262             : 
    7263       25260 :         btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
    7264             :                         finish_ordered_fn, NULL, NULL);
    7265       25256 :         btrfs_queue_work(root->fs_info->endio_write_workers,
    7266       25256 :                          &ordered->work);
    7267             : out_test:
    7268             :         /*
    7269             :          * our bio might span multiple ordered extents.  If we haven't
    7270             :          * completed the accounting for the whole dio, go back and try again
    7271             :          */
    7272       25260 :         if (ordered_offset < dip->logical_offset + dip->bytes) {
    7273           0 :                 ordered_bytes = dip->logical_offset + dip->bytes -
    7274             :                         ordered_offset;
    7275           0 :                 ordered = NULL;
    7276           0 :                 goto again;
    7277             :         }
    7278             : out_done:
    7279       25258 :         dio_bio = dip->dio_bio;
    7280             : 
    7281       25258 :         kfree(dip);
    7282             : 
    7283             :         /* If we had an error make sure to clear the uptodate flag */
    7284       25260 :         if (err)
    7285             :                 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
    7286       25260 :         dio_end_io(dio_bio, err);
    7287       25260 :         bio_put(bio);
    7288       25255 : }
    7289             : 
    7290           0 : static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
    7291             :                                     struct bio *bio, int mirror_num,
    7292             :                                     unsigned long bio_flags, u64 offset)
    7293             : {
    7294             :         int ret;
    7295           0 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7296           0 :         ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
    7297           0 :         BUG_ON(ret); /* -ENOMEM */
    7298           0 :         return 0;
    7299             : }
    7300             : 
    7301           0 : static void btrfs_end_dio_bio(struct bio *bio, int err)
    7302             : {
    7303           0 :         struct btrfs_dio_private *dip = bio->bi_private;
    7304             : 
    7305           0 :         if (err) {
    7306           0 :                 btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
    7307             :                           "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
    7308             :                       btrfs_ino(dip->inode), bio->bi_rw,
    7309             :                       (unsigned long long)bio->bi_iter.bi_sector,
    7310             :                       bio->bi_iter.bi_size, err);
    7311           0 :                 dip->errors = 1;
    7312             : 
    7313             :                 /*
    7314             :                  * before atomic variable goto zero, we must make sure
    7315             :                  * dip->errors is perceived to be set.
    7316             :                  */
    7317           0 :                 smp_mb__before_atomic();
    7318             :         }
    7319             : 
    7320             :         /* if there are more bios still pending for this dio, just exit */
    7321           0 :         if (!atomic_dec_and_test(&dip->pending_bios))
    7322             :                 goto out;
    7323             : 
    7324           0 :         if (dip->errors) {
    7325           0 :                 bio_io_error(dip->orig_bio);
    7326             :         } else {
    7327           0 :                 set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
    7328           0 :                 bio_endio(dip->orig_bio, 0);
    7329             :         }
    7330             : out:
    7331           0 :         bio_put(bio);
    7332           0 : }
    7333             : 
    7334           0 : static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
    7335             :                                        u64 first_sector, gfp_t gfp_flags)
    7336             : {
    7337           0 :         int nr_vecs = bio_get_nr_vecs(bdev);
    7338           0 :         return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
    7339             : }
    7340             : 
    7341       25255 : static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
    7342             :                                          int rw, u64 file_offset, int skip_sum,
    7343             :                                          int async_submit)
    7344             : {
    7345       25255 :         struct btrfs_dio_private *dip = bio->bi_private;
    7346       25255 :         int write = rw & REQ_WRITE;
    7347       25255 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7348             :         int ret;
    7349             : 
    7350       25255 :         if (async_submit)
    7351           0 :                 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
    7352             : 
    7353       25255 :         bio_get(bio);
    7354             : 
    7355       25260 :         if (!write) {
    7356           0 :                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
    7357           0 :                 if (ret)
    7358             :                         goto err;
    7359             :         }
    7360             : 
    7361       25255 :         if (skip_sum)
    7362             :                 goto map;
    7363             : 
    7364       25255 :         if (write && async_submit) {
    7365           0 :                 ret = btrfs_wq_submit_bio(root->fs_info,
    7366             :                                    inode, rw, bio, 0, 0,
    7367             :                                    file_offset,
    7368             :                                    __btrfs_submit_bio_start_direct_io,
    7369             :                                    __btrfs_submit_bio_done);
    7370           0 :                 goto err;
    7371       25255 :         } else if (write) {
    7372             :                 /*
    7373             :                  * If we aren't doing async submit, calculate the csum of the
    7374             :                  * bio now.
    7375             :                  */
    7376       25255 :                 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
    7377       25258 :                 if (ret)
    7378             :                         goto err;
    7379           0 :         } else if (!skip_sum) {
    7380           0 :                 ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
    7381             :                                                 file_offset);
    7382           0 :                 if (ret)
    7383             :                         goto err;
    7384             :         }
    7385             : 
    7386             : map:
    7387       25258 :         ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
    7388             : err:
    7389       25264 :         bio_put(bio);
    7390       25260 :         return ret;
    7391             : }
    7392             : 
    7393       25258 : static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
    7394             :                                     int skip_sum)
    7395             : {
    7396       25258 :         struct inode *inode = dip->inode;
    7397       25258 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7398             :         struct bio *bio;
    7399       25258 :         struct bio *orig_bio = dip->orig_bio;
    7400       25258 :         struct bio_vec *bvec = orig_bio->bi_io_vec;
    7401       25258 :         u64 start_sector = orig_bio->bi_iter.bi_sector;
    7402       25258 :         u64 file_offset = dip->logical_offset;
    7403             :         u64 submit_len = 0;
    7404             :         u64 map_length;
    7405             :         int nr_pages = 0;
    7406             :         int ret = 0;
    7407             :         int async_submit = 0;
    7408             : 
    7409       25258 :         map_length = orig_bio->bi_iter.bi_size;
    7410       25258 :         ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
    7411             :                               &map_length, NULL, 0);
    7412       25259 :         if (ret)
    7413             :                 return -EIO;
    7414             : 
    7415       25259 :         if (map_length >= orig_bio->bi_iter.bi_size) {
    7416             :                 bio = orig_bio;
    7417             :                 goto submit;
    7418             :         }
    7419             : 
    7420             :         /* async crcs make it difficult to collect full stripe writes. */
    7421           0 :         if (btrfs_get_alloc_profile(root, 1) &
    7422             :             (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
    7423             :                 async_submit = 0;
    7424             :         else
    7425             :                 async_submit = 1;
    7426             : 
    7427           0 :         bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
    7428           0 :         if (!bio)
    7429             :                 return -ENOMEM;
    7430             : 
    7431           0 :         bio->bi_private = dip;
    7432           0 :         bio->bi_end_io = btrfs_end_dio_bio;
    7433           0 :         atomic_inc(&dip->pending_bios);
    7434             : 
    7435           0 :         while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
    7436           0 :                 if (unlikely(map_length < submit_len + bvec->bv_len ||
    7437             :                     bio_add_page(bio, bvec->bv_page, bvec->bv_len,
    7438             :                                  bvec->bv_offset) < bvec->bv_len)) {
    7439             :                         /*
    7440             :                          * inc the count before we submit the bio so
    7441             :                          * we know the end IO handler won't happen before
    7442             :                          * we inc the count. Otherwise, the dip might get freed
    7443             :                          * before we're done setting it up
    7444             :                          */
    7445             :                         atomic_inc(&dip->pending_bios);
    7446           0 :                         ret = __btrfs_submit_dio_bio(bio, inode, rw,
    7447             :                                                      file_offset, skip_sum,
    7448             :                                                      async_submit);
    7449           0 :                         if (ret) {
    7450           0 :                                 bio_put(bio);
    7451             :                                 atomic_dec(&dip->pending_bios);
    7452             :                                 goto out_err;
    7453             :                         }
    7454             : 
    7455           0 :                         start_sector += submit_len >> 9;
    7456           0 :                         file_offset += submit_len;
    7457             : 
    7458             :                         submit_len = 0;
    7459             :                         nr_pages = 0;
    7460             : 
    7461           0 :                         bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
    7462             :                                                   start_sector, GFP_NOFS);
    7463           0 :                         if (!bio)
    7464             :                                 goto out_err;
    7465           0 :                         bio->bi_private = dip;
    7466           0 :                         bio->bi_end_io = btrfs_end_dio_bio;
    7467             : 
    7468           0 :                         map_length = orig_bio->bi_iter.bi_size;
    7469           0 :                         ret = btrfs_map_block(root->fs_info, rw,
    7470             :                                               start_sector << 9,
    7471             :                                               &map_length, NULL, 0);
    7472           0 :                         if (ret) {
    7473           0 :                                 bio_put(bio);
    7474           0 :                                 goto out_err;
    7475             :                         }
    7476             :                 } else {
    7477           0 :                         submit_len += bvec->bv_len;
    7478             :                         nr_pages++;
    7479           0 :                         bvec++;
    7480             :                 }
    7481             :         }
    7482             : 
    7483             : submit:
    7484       25259 :         ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
    7485             :                                      async_submit);
    7486       25260 :         if (!ret)
    7487             :                 return 0;
    7488             : 
    7489           0 :         bio_put(bio);
    7490             : out_err:
    7491           0 :         dip->errors = 1;
    7492             :         /*
    7493             :          * before atomic variable goto zero, we must
    7494             :          * make sure dip->errors is perceived to be set.
    7495             :          */
    7496           0 :         smp_mb__before_atomic();
    7497           0 :         if (atomic_dec_and_test(&dip->pending_bios))
    7498           0 :                 bio_io_error(dip->orig_bio);
    7499             : 
    7500             :         /* bio_end_io() will handle error, so we needn't return it */
    7501             :         return 0;
    7502             : }
    7503             : 
    7504       25241 : static void btrfs_submit_direct(int rw, struct bio *dio_bio,
    7505             :                                 struct inode *inode, loff_t file_offset)
    7506             : {
    7507       25241 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7508             :         struct btrfs_dio_private *dip;
    7509             :         struct bio *io_bio;
    7510             :         int skip_sum;
    7511             :         int sum_len;
    7512       25241 :         int write = rw & REQ_WRITE;
    7513             :         int ret = 0;
    7514             :         u16 csum_size;
    7515             : 
    7516       25241 :         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
    7517             : 
    7518       25241 :         io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
    7519       25257 :         if (!io_bio) {
    7520             :                 ret = -ENOMEM;
    7521             :                 goto free_ordered;
    7522             :         }
    7523             : 
    7524       25253 :         if (!skip_sum && !write) {
    7525           0 :                 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
    7526           0 :                 sum_len = dio_bio->bi_iter.bi_size >>
    7527           0 :                         inode->i_sb->s_blocksize_bits;
    7528           0 :                 sum_len *= csum_size;
    7529             :         } else {
    7530             :                 sum_len = 0;
    7531             :         }
    7532             : 
    7533       25253 :         dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
    7534       25258 :         if (!dip) {
    7535             :                 ret = -ENOMEM;
    7536             :                 goto free_io_bio;
    7537             :         }
    7538             : 
    7539       25258 :         dip->private = dio_bio->bi_private;
    7540       25258 :         dip->inode = inode;
    7541       25258 :         dip->logical_offset = file_offset;
    7542       25258 :         dip->bytes = dio_bio->bi_iter.bi_size;
    7543       25258 :         dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
    7544       25258 :         io_bio->bi_private = dip;
    7545       25258 :         dip->errors = 0;
    7546       25258 :         dip->orig_bio = io_bio;
    7547       25258 :         dip->dio_bio = dio_bio;
    7548             :         atomic_set(&dip->pending_bios, 0);
    7549             : 
    7550       25258 :         if (write)
    7551       25258 :                 io_bio->bi_end_io = btrfs_endio_direct_write;
    7552             :         else
    7553           0 :                 io_bio->bi_end_io = btrfs_endio_direct_read;
    7554             : 
    7555       25258 :         ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
    7556       25260 :         if (!ret)
    7557       25260 :                 return;
    7558             : 
    7559             : free_io_bio:
    7560           0 :         bio_put(io_bio);
    7561             : 
    7562             : free_ordered:
    7563             :         /*
    7564             :          * If this is a write, we need to clean up the reserved space and kill
    7565             :          * the ordered extent.
    7566             :          */
    7567           0 :         if (write) {
    7568             :                 struct btrfs_ordered_extent *ordered;
    7569           0 :                 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
    7570           0 :                 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
    7571             :                     !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
    7572           0 :                         btrfs_free_reserved_extent(root, ordered->start,
    7573             :                                                    ordered->disk_len, 1);
    7574           0 :                 btrfs_put_ordered_extent(ordered);
    7575           0 :                 btrfs_put_ordered_extent(ordered);
    7576             :         }
    7577           0 :         bio_endio(dio_bio, ret);
    7578             : }
    7579             : 
    7580       25263 : static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
    7581             :                         const struct iov_iter *iter, loff_t offset)
    7582             : {
    7583             :         int seg;
    7584             :         int i;
    7585       25263 :         unsigned blocksize_mask = root->sectorsize - 1;
    7586             :         ssize_t retval = -EINVAL;
    7587             : 
    7588       25263 :         if (offset & blocksize_mask)
    7589             :                 goto out;
    7590             : 
    7591       25262 :         if (iov_iter_alignment(iter) & blocksize_mask)
    7592             :                 goto out;
    7593             : 
    7594             :         /* If this is a write we don't need to check anymore */
    7595       25265 :         if (rw & WRITE)
    7596             :                 return 0;
    7597             :         /*
    7598             :          * Check to make sure we don't have duplicate iov_base's in this
    7599             :          * iovec, if so return EINVAL, otherwise we'll get csum errors
    7600             :          * when reading back.
    7601             :          */
    7602           0 :         for (seg = 0; seg < iter->nr_segs; seg++) {
    7603           0 :                 for (i = seg + 1; i < iter->nr_segs; i++) {
    7604           0 :                         if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
    7605             :                                 goto out;
    7606             :                 }
    7607             :         }
    7608             :         retval = 0;
    7609             : out:
    7610             :         return retval;
    7611             : }
    7612             : 
    7613       25262 : static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
    7614       25270 :                         struct iov_iter *iter, loff_t offset)
    7615             : {
    7616       25262 :         struct file *file = iocb->ki_filp;
    7617       25262 :         struct inode *inode = file->f_mapping->host;
    7618             :         size_t count = 0;
    7619             :         int flags = 0;
    7620             :         bool wakeup = true;
    7621             :         bool relock = false;
    7622             :         ssize_t ret;
    7623             : 
    7624       25262 :         if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
    7625             :                 return 0;
    7626             : 
    7627       25265 :         atomic_inc(&inode->i_dio_count);
    7628       25268 :         smp_mb__after_atomic();
    7629             : 
    7630             :         /*
    7631             :          * The generic stuff only does filemap_write_and_wait_range, which
    7632             :          * isn't enough if we've written compressed pages to this area, so
    7633             :          * we need to flush the dirty pages again to make absolutely sure
    7634             :          * that any outstanding dirty pages are on disk.
    7635             :          */
    7636             :         count = iov_iter_count(iter);
    7637       25270 :         if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
    7638             :                      &BTRFS_I(inode)->runtime_flags))
    7639           0 :                 filemap_fdatawrite_range(inode->i_mapping, offset,
    7640           0 :                                          offset + count - 1);
    7641             : 
    7642       25270 :         if (rw & WRITE) {
    7643             :                 /*
    7644             :                  * If the write DIO is beyond the EOF, we need update
    7645             :                  * the isize, but it is protected by i_mutex. So we can
    7646             :                  * not unlock the i_mutex at this case.
    7647             :                  */
    7648       25270 :                 if (offset + count <= inode->i_size) {
    7649       21447 :                         mutex_unlock(&inode->i_mutex);
    7650             :                         relock = true;
    7651             :                 }
    7652       25270 :                 ret = btrfs_delalloc_reserve_space(inode, count);
    7653       25270 :                 if (ret)
    7654             :                         goto out;
    7655           0 :         } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
    7656             :                                      &BTRFS_I(inode)->runtime_flags))) {
    7657           0 :                 inode_dio_done(inode);
    7658             :                 flags = DIO_LOCKING | DIO_SKIP_HOLES;
    7659             :                 wakeup = false;
    7660             :         }
    7661             : 
    7662       25261 :         ret = __blockdev_direct_IO(rw, iocb, inode,
    7663       25261 :                         BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
    7664             :                         iter, offset, btrfs_get_blocks_direct, NULL,
    7665             :                         btrfs_submit_direct, flags);
    7666       25258 :         if (rw & WRITE) {
    7667       25257 :                 if (ret < 0 && ret != -EIOCBQUEUED)
    7668           0 :                         btrfs_delalloc_release_space(inode, count);
    7669       25257 :                 else if (ret >= 0 && (size_t)ret < count)
    7670           0 :                         btrfs_delalloc_release_space(inode,
    7671           0 :                                                      count - (size_t)ret);
    7672             :                 else
    7673       25257 :                         btrfs_delalloc_release_metadata(inode, 0);
    7674             :         }
    7675             : out:
    7676       25269 :         if (wakeup)
    7677       25269 :                 inode_dio_done(inode);
    7678       25269 :         if (relock)
    7679       21447 :                 mutex_lock(&inode->i_mutex);
    7680             : 
    7681       25269 :         return ret;
    7682             : }
    7683             : 
    7684             : #define BTRFS_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
    7685             : 
    7686         851 : static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
    7687             :                 __u64 start, __u64 len)
    7688             : {
    7689             :         int     ret;
    7690             : 
    7691         851 :         ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
    7692         851 :         if (ret)
    7693             :                 return ret;
    7694             : 
    7695         344 :         return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
    7696             : }
    7697             : 
    7698       26884 : int btrfs_readpage(struct file *file, struct page *page)
    7699             : {
    7700             :         struct extent_io_tree *tree;
    7701       27771 :         tree = &BTRFS_I(page->mapping->host)->io_tree;
    7702       27771 :         return extent_read_full_page(tree, page, btrfs_get_extent, 0);
    7703             : }
    7704             : 
    7705           0 : static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
    7706             : {
    7707             :         struct extent_io_tree *tree;
    7708             : 
    7709             : 
    7710           0 :         if (current->flags & PF_MEMALLOC) {
    7711           0 :                 redirty_page_for_writepage(wbc, page);
    7712           0 :                 unlock_page(page);
    7713           0 :                 return 0;
    7714             :         }
    7715           0 :         tree = &BTRFS_I(page->mapping->host)->io_tree;
    7716           0 :         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
    7717             : }
    7718             : 
    7719       38223 : static int btrfs_writepages(struct address_space *mapping,
    7720             :                             struct writeback_control *wbc)
    7721             : {
    7722             :         struct extent_io_tree *tree;
    7723             : 
    7724       38223 :         tree = &BTRFS_I(mapping->host)->io_tree;
    7725       38223 :         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
    7726             : }
    7727             : 
    7728             : static int
    7729       24097 : btrfs_readpages(struct file *file, struct address_space *mapping,
    7730             :                 struct list_head *pages, unsigned nr_pages)
    7731             : {
    7732             :         struct extent_io_tree *tree;
    7733       24097 :         tree = &BTRFS_I(mapping->host)->io_tree;
    7734       24097 :         return extent_readpages(tree, mapping, pages, nr_pages,
    7735             :                                 btrfs_get_extent);
    7736             : }
    7737      188223 : static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
    7738             : {
    7739             :         struct extent_io_tree *tree;
    7740             :         struct extent_map_tree *map;
    7741             :         int ret;
    7742             : 
    7743      188223 :         tree = &BTRFS_I(page->mapping->host)->io_tree;
    7744      188223 :         map = &BTRFS_I(page->mapping->host)->extent_tree;
    7745      188223 :         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
    7746      188224 :         if (ret == 1) {
    7747             :                 ClearPagePrivate(page);
    7748      188224 :                 set_page_private(page, 0);
    7749      188224 :                 page_cache_release(page);
    7750             :         }
    7751      188224 :         return ret;
    7752             : }
    7753             : 
    7754        1568 : static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
    7755             : {
    7756        3136 :         if (PageWriteback(page) || PageDirty(page))
    7757             :                 return 0;
    7758        1527 :         return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
    7759             : }
    7760             : 
    7761     3512376 : static void btrfs_invalidatepage(struct page *page, unsigned int offset,
    7762             :                                  unsigned int length)
    7763             : {
    7764     1756188 :         struct inode *inode = page->mapping->host;
    7765             :         struct extent_io_tree *tree;
    7766             :         struct btrfs_ordered_extent *ordered;
    7767     1756188 :         struct extent_state *cached_state = NULL;
    7768     1756188 :         u64 page_start = page_offset(page);
    7769     1756188 :         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
    7770     1756188 :         int inode_evicting = inode->i_state & I_FREEING;
    7771             : 
    7772             :         /*
    7773             :          * we have the page locked, so new writeback can't start,
    7774             :          * and the dirty bit won't be cleared while we are here.
    7775             :          *
    7776             :          * Wait for IO on this page so that we can safely clear
    7777             :          * the PagePrivate2 bit and do ordered accounting
    7778             :          */
    7779     1756188 :         wait_on_page_writeback(page);
    7780             : 
    7781     1756188 :         tree = &BTRFS_I(inode)->io_tree;
    7782     1756188 :         if (offset) {
    7783         292 :                 btrfs_releasepage(page, GFP_NOFS);
    7784     1756482 :                 return;
    7785             :         }
    7786             : 
    7787     1755896 :         if (!inode_evicting)
    7788      186696 :                 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
    7789     1755896 :         ordered = btrfs_lookup_ordered_extent(inode, page_start);
    7790     1755897 :         if (ordered) {
    7791             :                 /*
    7792             :                  * IO on this page will never be started, so we need
    7793             :                  * to account for any ordered extents now
    7794             :                  */
    7795           0 :                 if (!inode_evicting)
    7796           0 :                         clear_extent_bit(tree, page_start, page_end,
    7797             :                                          EXTENT_DIRTY | EXTENT_DELALLOC |
    7798             :                                          EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
    7799             :                                          EXTENT_DEFRAG, 1, 0, &cached_state,
    7800             :                                          GFP_NOFS);
    7801             :                 /*
    7802             :                  * whoever cleared the private bit is responsible
    7803             :                  * for the finish_ordered_io
    7804             :                  */
    7805           0 :                 if (TestClearPagePrivate2(page)) {
    7806             :                         struct btrfs_ordered_inode_tree *tree;
    7807             :                         u64 new_len;
    7808             : 
    7809             :                         tree = &BTRFS_I(inode)->ordered_tree;
    7810             : 
    7811             :                         spin_lock_irq(&tree->lock);
    7812           0 :                         set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
    7813           0 :                         new_len = page_start - ordered->file_offset;
    7814           0 :                         if (new_len < ordered->truncated_len)
    7815           0 :                                 ordered->truncated_len = new_len;
    7816             :                         spin_unlock_irq(&tree->lock);
    7817             : 
    7818           0 :                         if (btrfs_dec_test_ordered_pending(inode, &ordered,
    7819             :                                                            page_start,
    7820             :                                                            PAGE_CACHE_SIZE, 1))
    7821           0 :                                 btrfs_finish_ordered_io(ordered);
    7822             :                 }
    7823           0 :                 btrfs_put_ordered_extent(ordered);
    7824           0 :                 if (!inode_evicting) {
    7825           0 :                         cached_state = NULL;
    7826           0 :                         lock_extent_bits(tree, page_start, page_end, 0,
    7827             :                                          &cached_state);
    7828             :                 }
    7829             :         }
    7830             : 
    7831     1755898 :         if (!inode_evicting) {
    7832      186697 :                 clear_extent_bit(tree, page_start, page_end,
    7833             :                                  EXTENT_LOCKED | EXTENT_DIRTY |
    7834             :                                  EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
    7835             :                                  EXTENT_DEFRAG, 1, 1,
    7836             :                                  &cached_state, GFP_NOFS);
    7837             : 
    7838      186696 :                 __btrfs_releasepage(page, GFP_NOFS);
    7839             :         }
    7840             : 
    7841             :         ClearPageChecked(page);
    7842     1755898 :         if (PagePrivate(page)) {
    7843             :                 ClearPagePrivate(page);
    7844     1569201 :                 set_page_private(page, 0);
    7845     1569201 :                 page_cache_release(page);
    7846             :         }
    7847             : }
    7848             : 
    7849             : /*
    7850             :  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
    7851             :  * called from a page fault handler when a page is first dirtied. Hence we must
    7852             :  * be careful to check for EOF conditions here. We set the page up correctly
    7853             :  * for a written page which means we get ENOSPC checking when writing into
    7854             :  * holes and correct delalloc and unwritten extent mapping on filesystems that
    7855             :  * support these features.
    7856             :  *
    7857             :  * We are not allowed to take the i_mutex here so we have to play games to
    7858             :  * protect against truncate races as the page could now be beyond EOF.  Because
    7859             :  * vmtruncate() writes the inode size before removing pages, once we have the
    7860             :  * page lock we can determine safely if the page is beyond EOF. If it is not
    7861             :  * beyond EOF, then the page is guaranteed safe against truncation until we
    7862             :  * unlock the page.
    7863             :  */
    7864        6081 : int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
    7865             : {
    7866       12166 :         struct page *page = vmf->page;
    7867       12166 :         struct inode *inode = file_inode(vma->vm_file);
    7868        6081 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7869        6081 :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
    7870             :         struct btrfs_ordered_extent *ordered;
    7871        6081 :         struct extent_state *cached_state = NULL;
    7872             :         char *kaddr;
    7873             :         unsigned long zero_start;
    7874             :         loff_t size;
    7875             :         int ret;
    7876             :         int reserved = 0;
    7877             :         u64 page_start;
    7878             :         u64 page_end;
    7879             : 
    7880        6081 :         sb_start_pagefault(inode->i_sb);
    7881        6081 :         ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
    7882        6081 :         if (!ret) {
    7883        6081 :                 ret = file_update_time(vma->vm_file);
    7884             :                 reserved = 1;
    7885             :         }
    7886        6081 :         if (ret) {
    7887           0 :                 if (ret == -ENOMEM)
    7888             :                         ret = VM_FAULT_OOM;
    7889             :                 else /* -ENOSPC, -EIO, etc */
    7890             :                         ret = VM_FAULT_SIGBUS;
    7891           0 :                 if (reserved)
    7892             :                         goto out;
    7893             :                 goto out_noreserve;
    7894             :         }
    7895             : 
    7896             :         ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
    7897             : again:
    7898        6085 :         lock_page(page);
    7899             :         size = i_size_read(inode);
    7900        6085 :         page_start = page_offset(page);
    7901        6085 :         page_end = page_start + PAGE_CACHE_SIZE - 1;
    7902             : 
    7903       12170 :         if ((page->mapping != inode->i_mapping) ||
    7904        6085 :             (page_start >= size)) {
    7905             :                 /* page got truncated out from underneath us */
    7906             :                 goto out_unlock;
    7907             :         }
    7908        6085 :         wait_on_page_writeback(page);
    7909             : 
    7910        6085 :         lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
    7911        6085 :         set_page_extent_mapped(page);
    7912             : 
    7913             :         /*
    7914             :          * we can't set the delalloc bits if there are pending ordered
    7915             :          * extents.  Drop our locks and wait for them to finish
    7916             :          */
    7917        6085 :         ordered = btrfs_lookup_ordered_extent(inode, page_start);
    7918        6085 :         if (ordered) {
    7919           4 :                 unlock_extent_cached(io_tree, page_start, page_end,
    7920             :                                      &cached_state, GFP_NOFS);
    7921           4 :                 unlock_page(page);
    7922           4 :                 btrfs_start_ordered_extent(inode, ordered, 1);
    7923           4 :                 btrfs_put_ordered_extent(ordered);
    7924           4 :                 goto again;
    7925             :         }
    7926             : 
    7927             :         /*
    7928             :          * XXX - page_mkwrite gets called every time the page is dirtied, even
    7929             :          * if it was already dirty, so for space accounting reasons we need to
    7930             :          * clear any delalloc bits for the range we are fixing to save.  There
    7931             :          * is probably a better way to do this, but for now keep consistent with
    7932             :          * prepare_pages in the normal write path.
    7933             :          */
    7934        6081 :         clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
    7935             :                           EXTENT_DIRTY | EXTENT_DELALLOC |
    7936             :                           EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
    7937             :                           0, 0, &cached_state, GFP_NOFS);
    7938             : 
    7939        6081 :         ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
    7940             :                                         &cached_state);
    7941        6081 :         if (ret) {
    7942           0 :                 unlock_extent_cached(io_tree, page_start, page_end,
    7943             :                                      &cached_state, GFP_NOFS);
    7944             :                 ret = VM_FAULT_SIGBUS;
    7945           0 :                 goto out_unlock;
    7946             :         }
    7947             :         ret = 0;
    7948             : 
    7949             :         /* page is wholly or partially inside EOF */
    7950        6081 :         if (page_start + PAGE_CACHE_SIZE > size)
    7951           0 :                 zero_start = size & ~PAGE_CACHE_MASK;
    7952             :         else
    7953             :                 zero_start = PAGE_CACHE_SIZE;
    7954             : 
    7955        6081 :         if (zero_start != PAGE_CACHE_SIZE) {
    7956             :                 kaddr = kmap(page);
    7957           0 :                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
    7958             :                 flush_dcache_page(page);
    7959             :                 kunmap(page);
    7960             :         }
    7961             :         ClearPageChecked(page);
    7962        6081 :         set_page_dirty(page);
    7963             :         SetPageUptodate(page);
    7964             : 
    7965        6081 :         BTRFS_I(inode)->last_trans = root->fs_info->generation;
    7966        6081 :         BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
    7967        6081 :         BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
    7968             : 
    7969        6081 :         unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
    7970             : 
    7971             : out_unlock:
    7972        6081 :         if (!ret) {
    7973        6081 :                 sb_end_pagefault(inode->i_sb);
    7974        6081 :                 return VM_FAULT_LOCKED;
    7975             :         }
    7976           0 :         unlock_page(page);
    7977             : out:
    7978           0 :         btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
    7979             : out_noreserve:
    7980           0 :         sb_end_pagefault(inode->i_sb);
    7981           0 :         return ret;
    7982             : }
    7983             : 
    7984        2723 : static int btrfs_truncate(struct inode *inode)
    7985             : {
    7986        5446 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    7987             :         struct btrfs_block_rsv *rsv;
    7988             :         int ret = 0;
    7989             :         int err = 0;
    7990             :         struct btrfs_trans_handle *trans;
    7991        2723 :         u64 mask = root->sectorsize - 1;
    7992             :         u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
    7993             : 
    7994        2723 :         ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
    7995             :                                        (u64)-1);
    7996        2723 :         if (ret)
    7997             :                 return ret;
    7998             : 
    7999             :         /*
    8000             :          * Yes ladies and gentelment, this is indeed ugly.  The fact is we have
    8001             :          * 3 things going on here
    8002             :          *
    8003             :          * 1) We need to reserve space for our orphan item and the space to
    8004             :          * delete our orphan item.  Lord knows we don't want to have a dangling
    8005             :          * orphan item because we didn't reserve space to remove it.
    8006             :          *
    8007             :          * 2) We need to reserve space to update our inode.
    8008             :          *
    8009             :          * 3) We need to have something to cache all the space that is going to
    8010             :          * be free'd up by the truncate operation, but also have some slack
    8011             :          * space reserved in case it uses space during the truncate (thank you
    8012             :          * very much snapshotting).
    8013             :          *
    8014             :          * And we need these to all be seperate.  The fact is we can use alot of
    8015             :          * space doing the truncate, and we have no earthly idea how much space
    8016             :          * we will use, so we need the truncate reservation to be seperate so it
    8017             :          * doesn't end up using space reserved for updating the inode or
    8018             :          * removing the orphan item.  We also need to be able to stop the
    8019             :          * transaction and start a new one, which means we need to be able to
    8020             :          * update the inode several times, and we have no idea of knowing how
    8021             :          * many times that will be, so we can't just reserve 1 item for the
    8022             :          * entirety of the opration, so that has to be done seperately as well.
    8023             :          * Then there is the orphan item, which does indeed need to be held on
    8024             :          * to for the whole operation, and we need nobody to touch this reserved
    8025             :          * space except the orphan code.
    8026             :          *
    8027             :          * So that leaves us with
    8028             :          *
    8029             :          * 1) root->orphan_block_rsv - for the orphan deletion.
    8030             :          * 2) rsv - for the truncate reservation, which we will steal from the
    8031             :          * transaction reservation.
    8032             :          * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
    8033             :          * updating the inode.
    8034             :          */
    8035        2723 :         rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
    8036        2723 :         if (!rsv)
    8037             :                 return -ENOMEM;
    8038        2723 :         rsv->size = min_size;
    8039        2723 :         rsv->failfast = 1;
    8040             : 
    8041             :         /*
    8042             :          * 1 for the truncate slack space
    8043             :          * 1 for updating the inode.
    8044             :          */
    8045        2723 :         trans = btrfs_start_transaction(root, 2);
    8046        2723 :         if (IS_ERR(trans)) {
    8047           0 :                 err = PTR_ERR(trans);
    8048           0 :                 goto out;
    8049             :         }
    8050             : 
    8051             :         /* Migrate the slack space for the truncate to our reserve */
    8052        2723 :         ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
    8053             :                                       min_size);
    8054        2723 :         BUG_ON(ret);
    8055             : 
    8056             :         /*
    8057             :          * So if we truncate and then write and fsync we normally would just
    8058             :          * write the extents that changed, which is a problem if we need to
    8059             :          * first truncate that entire inode.  So set this flag so we write out
    8060             :          * all of the extents in the inode to the sync log so we're completely
    8061             :          * safe.
    8062             :          */
    8063             :         set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
    8064        2723 :         trans->block_rsv = rsv;
    8065             : 
    8066             :         while (1) {
    8067        2723 :                 ret = btrfs_truncate_inode_items(trans, root, inode,
    8068        2723 :                                                  inode->i_size,
    8069             :                                                  BTRFS_EXTENT_DATA_KEY);
    8070        2723 :                 if (ret != -ENOSPC) {
    8071             :                         err = ret;
    8072             :                         break;
    8073             :                 }
    8074             : 
    8075           0 :                 trans->block_rsv = &root->fs_info->trans_block_rsv;
    8076           0 :                 ret = btrfs_update_inode(trans, root, inode);
    8077           0 :                 if (ret) {
    8078             :                         err = ret;
    8079             :                         break;
    8080             :                 }
    8081             : 
    8082           0 :                 btrfs_end_transaction(trans, root);
    8083           0 :                 btrfs_btree_balance_dirty(root);
    8084             : 
    8085           0 :                 trans = btrfs_start_transaction(root, 2);
    8086           0 :                 if (IS_ERR(trans)) {
    8087           0 :                         ret = err = PTR_ERR(trans);
    8088             :                         trans = NULL;
    8089           0 :                         break;
    8090             :                 }
    8091             : 
    8092           0 :                 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
    8093             :                                               rsv, min_size);
    8094           0 :                 BUG_ON(ret);    /* shouldn't happen */
    8095           0 :                 trans->block_rsv = rsv;
    8096           0 :         }
    8097             : 
    8098        2723 :         if (ret == 0 && inode->i_nlink > 0) {
    8099        2723 :                 trans->block_rsv = root->orphan_block_rsv;
    8100        2723 :                 ret = btrfs_orphan_del(trans, inode);
    8101        2723 :                 if (ret)
    8102             :                         err = ret;
    8103             :         }
    8104             : 
    8105        2723 :         if (trans) {
    8106        2723 :                 trans->block_rsv = &root->fs_info->trans_block_rsv;
    8107        2723 :                 ret = btrfs_update_inode(trans, root, inode);
    8108        2723 :                 if (ret && !err)
    8109             :                         err = ret;
    8110             : 
    8111        2723 :                 ret = btrfs_end_transaction(trans, root);
    8112        2723 :                 btrfs_btree_balance_dirty(root);
    8113             :         }
    8114             : 
    8115             : out:
    8116        2723 :         btrfs_free_block_rsv(root, rsv);
    8117             : 
    8118        2723 :         if (ret && !err)
    8119             :                 err = ret;
    8120             : 
    8121        2723 :         return err;
    8122             : }
    8123             : 
    8124             : /*
    8125             :  * create a new subvolume directory/inode (helper for the ioctl).
    8126             :  */
    8127          49 : int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
    8128             :                              struct btrfs_root *new_root,
    8129             :                              struct btrfs_root *parent_root,
    8130             :                              u64 new_dirid)
    8131             : {
    8132             :         struct inode *inode;
    8133             :         int err;
    8134          49 :         u64 index = 0;
    8135             : 
    8136          49 :         inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
    8137             :                                 new_dirid, new_dirid,
    8138          49 :                                 S_IFDIR | (~current_umask() & S_IRWXUGO),
    8139             :                                 &index);
    8140          49 :         if (IS_ERR(inode))
    8141           0 :                 return PTR_ERR(inode);
    8142          49 :         inode->i_op = &btrfs_dir_inode_operations;
    8143          49 :         inode->i_fop = &btrfs_dir_file_operations;
    8144             : 
    8145          49 :         set_nlink(inode, 1);
    8146             :         btrfs_i_size_write(inode, 0);
    8147          49 :         unlock_new_inode(inode);
    8148             : 
    8149          49 :         err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
    8150          49 :         if (err)
    8151           0 :                 btrfs_err(new_root->fs_info,
    8152             :                           "error inheriting subvolume %llu properties: %d",
    8153             :                           new_root->root_key.objectid, err);
    8154             : 
    8155          49 :         err = btrfs_update_inode(trans, new_root, inode);
    8156             : 
    8157          49 :         iput(inode);
    8158          49 :         return err;
    8159             : }
    8160             : 
    8161       25711 : struct inode *btrfs_alloc_inode(struct super_block *sb)
    8162             : {
    8163             :         struct btrfs_inode *ei;
    8164             :         struct inode *inode;
    8165             : 
    8166       25711 :         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
    8167       25711 :         if (!ei)
    8168             :                 return NULL;
    8169             : 
    8170       25709 :         ei->root = NULL;
    8171       25709 :         ei->generation = 0;
    8172       25709 :         ei->last_trans = 0;
    8173       25709 :         ei->last_sub_trans = 0;
    8174       25709 :         ei->logged_trans = 0;
    8175       25709 :         ei->delalloc_bytes = 0;
    8176       25709 :         ei->disk_i_size = 0;
    8177       25709 :         ei->flags = 0;
    8178       25709 :         ei->csum_bytes = 0;
    8179       25709 :         ei->index_cnt = (u64)-1;
    8180       25709 :         ei->dir_index = 0;
    8181       25709 :         ei->last_unlink_trans = 0;
    8182       25709 :         ei->last_log_commit = 0;
    8183             : 
    8184       25709 :         spin_lock_init(&ei->lock);
    8185       25709 :         ei->outstanding_extents = 0;
    8186       25709 :         ei->reserved_extents = 0;
    8187             : 
    8188       25709 :         ei->runtime_flags = 0;
    8189       25709 :         ei->force_compress = BTRFS_COMPRESS_NONE;
    8190             : 
    8191       25709 :         ei->delayed_node = NULL;
    8192             : 
    8193       25709 :         inode = &ei->vfs_inode;
    8194       25709 :         extent_map_tree_init(&ei->extent_tree);
    8195       25707 :         extent_io_tree_init(&ei->io_tree, &inode->i_data);
    8196       25707 :         extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
    8197       25706 :         ei->io_tree.track_uptodate = 1;
    8198       25706 :         ei->io_failure_tree.track_uptodate = 1;
    8199             :         atomic_set(&ei->sync_writers, 0);
    8200       25706 :         mutex_init(&ei->log_mutex);
    8201       25707 :         mutex_init(&ei->delalloc_mutex);
    8202             :         btrfs_ordered_inode_tree_init(&ei->ordered_tree);
    8203       25707 :         INIT_LIST_HEAD(&ei->delalloc_inodes);
    8204       25707 :         RB_CLEAR_NODE(&ei->rb_node);
    8205             : 
    8206       25707 :         return inode;
    8207             : }
    8208             : 
    8209             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    8210             : void btrfs_test_destroy_inode(struct inode *inode)
    8211             : {
    8212             :         btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
    8213             :         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
    8214             : }
    8215             : #endif
    8216             : 
    8217       25704 : static void btrfs_i_callback(struct rcu_head *head)
    8218             : {
    8219             :         struct inode *inode = container_of(head, struct inode, i_rcu);
    8220       25704 :         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
    8221       25704 : }
    8222             : 
    8223       25704 : void btrfs_destroy_inode(struct inode *inode)
    8224             : {
    8225             :         struct btrfs_ordered_extent *ordered;
    8226       25704 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    8227             : 
    8228       25704 :         WARN_ON(!hlist_empty(&inode->i_dentry));
    8229       25704 :         WARN_ON(inode->i_data.nrpages);
    8230       25704 :         WARN_ON(BTRFS_I(inode)->outstanding_extents);
    8231       25704 :         WARN_ON(BTRFS_I(inode)->reserved_extents);
    8232       25704 :         WARN_ON(BTRFS_I(inode)->delalloc_bytes);
    8233       25704 :         WARN_ON(BTRFS_I(inode)->csum_bytes);
    8234             : 
    8235             :         /*
    8236             :          * This can happen where we create an inode, but somebody else also
    8237             :          * created the same inode and we need to destroy the one we already
    8238             :          * created.
    8239             :          */
    8240       25704 :         if (!root)
    8241             :                 goto free;
    8242             : 
    8243       25704 :         if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
    8244             :                      &BTRFS_I(inode)->runtime_flags)) {
    8245           0 :                 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
    8246             :                         btrfs_ino(inode));
    8247           0 :                 atomic_dec(&root->orphan_inodes);
    8248             :         }
    8249             : 
    8250             :         while (1) {
    8251       25704 :                 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
    8252       25704 :                 if (!ordered)
    8253             :                         break;
    8254             :                 else {
    8255           0 :                         btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
    8256             :                                 ordered->file_offset, ordered->len);
    8257           0 :                         btrfs_remove_ordered_extent(inode, ordered);
    8258           0 :                         btrfs_put_ordered_extent(ordered);
    8259           0 :                         btrfs_put_ordered_extent(ordered);
    8260             :                 }
    8261           0 :         }
    8262       25704 :         inode_tree_del(inode);
    8263       25704 :         btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
    8264             : free:
    8265       25704 :         call_rcu(&inode->i_rcu, btrfs_i_callback);
    8266       25704 : }
    8267             : 
    8268       85705 : int btrfs_drop_inode(struct inode *inode)
    8269             : {
    8270       85705 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    8271             : 
    8272       85705 :         if (root == NULL)
    8273             :                 return 1;
    8274             : 
    8275             :         /* the snap/subvol tree is on deleting */
    8276       85704 :         if (btrfs_root_refs(&root->root_item) == 0)
    8277             :                 return 1;
    8278             :         else
    8279       85649 :                 return generic_drop_inode(inode);
    8280             : }
    8281             : 
    8282       15021 : static void init_once(void *foo)
    8283             : {
    8284             :         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
    8285             : 
    8286       15021 :         inode_init_once(&ei->vfs_inode);
    8287       15021 : }
    8288             : 
    8289           0 : void btrfs_destroy_cachep(void)
    8290             : {
    8291             :         /*
    8292             :          * Make sure all delayed rcu free inodes are flushed before we
    8293             :          * destroy cache.
    8294             :          */
    8295           0 :         rcu_barrier();
    8296           0 :         if (btrfs_inode_cachep)
    8297           0 :                 kmem_cache_destroy(btrfs_inode_cachep);
    8298           0 :         if (btrfs_trans_handle_cachep)
    8299           0 :                 kmem_cache_destroy(btrfs_trans_handle_cachep);
    8300           0 :         if (btrfs_transaction_cachep)
    8301           0 :                 kmem_cache_destroy(btrfs_transaction_cachep);
    8302           0 :         if (btrfs_path_cachep)
    8303           0 :                 kmem_cache_destroy(btrfs_path_cachep);
    8304           0 :         if (btrfs_free_space_cachep)
    8305           0 :                 kmem_cache_destroy(btrfs_free_space_cachep);
    8306           0 :         if (btrfs_delalloc_work_cachep)
    8307           0 :                 kmem_cache_destroy(btrfs_delalloc_work_cachep);
    8308           0 : }
    8309             : 
    8310           0 : int btrfs_init_cachep(void)
    8311             : {
    8312           0 :         btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
    8313             :                         sizeof(struct btrfs_inode), 0,
    8314             :                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
    8315           0 :         if (!btrfs_inode_cachep)
    8316             :                 goto fail;
    8317             : 
    8318           0 :         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
    8319             :                         sizeof(struct btrfs_trans_handle), 0,
    8320             :                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
    8321           0 :         if (!btrfs_trans_handle_cachep)
    8322             :                 goto fail;
    8323             : 
    8324           0 :         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
    8325             :                         sizeof(struct btrfs_transaction), 0,
    8326             :                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
    8327           0 :         if (!btrfs_transaction_cachep)
    8328             :                 goto fail;
    8329             : 
    8330           0 :         btrfs_path_cachep = kmem_cache_create("btrfs_path",
    8331             :                         sizeof(struct btrfs_path), 0,
    8332             :                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
    8333           0 :         if (!btrfs_path_cachep)
    8334             :                 goto fail;
    8335             : 
    8336           0 :         btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
    8337             :                         sizeof(struct btrfs_free_space), 0,
    8338             :                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
    8339           0 :         if (!btrfs_free_space_cachep)
    8340             :                 goto fail;
    8341             : 
    8342           0 :         btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
    8343             :                         sizeof(struct btrfs_delalloc_work), 0,
    8344             :                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
    8345             :                         NULL);
    8346           0 :         if (!btrfs_delalloc_work_cachep)
    8347             :                 goto fail;
    8348             : 
    8349             :         return 0;
    8350             : fail:
    8351           0 :         btrfs_destroy_cachep();
    8352           0 :         return -ENOMEM;
    8353             : }
    8354             : 
    8355      853993 : static int btrfs_getattr(struct vfsmount *mnt,
    8356             :                          struct dentry *dentry, struct kstat *stat)
    8357             : {
    8358             :         u64 delalloc_bytes;
    8359      853993 :         struct inode *inode = dentry->d_inode;
    8360      853993 :         u32 blocksize = inode->i_sb->s_blocksize;
    8361             : 
    8362      853993 :         generic_fillattr(inode, stat);
    8363      854030 :         stat->dev = BTRFS_I(inode)->root->anon_dev;
    8364      854030 :         stat->blksize = PAGE_CACHE_SIZE;
    8365             : 
    8366             :         spin_lock(&BTRFS_I(inode)->lock);
    8367      854129 :         delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
    8368             :         spin_unlock(&BTRFS_I(inode)->lock);
    8369     2562401 :         stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
    8370     1708322 :                         ALIGN(delalloc_bytes, blocksize)) >> 9;
    8371      854161 :         return 0;
    8372             : }
    8373             : 
    8374        2315 : static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
    8375             :                            struct inode *new_dir, struct dentry *new_dentry)
    8376             : {
    8377           1 :         struct btrfs_trans_handle *trans;
    8378        2315 :         struct btrfs_root *root = BTRFS_I(old_dir)->root;
    8379        2315 :         struct btrfs_root *dest = BTRFS_I(new_dir)->root;
    8380        2315 :         struct inode *new_inode = new_dentry->d_inode;
    8381        2315 :         struct inode *old_inode = old_dentry->d_inode;
    8382        2315 :         struct timespec ctime = CURRENT_TIME;
    8383        2315 :         u64 index = 0;
    8384             :         u64 root_objectid;
    8385             :         int ret;
    8386             :         u64 old_ino = btrfs_ino(old_inode);
    8387             : 
    8388        2315 :         if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
    8389             :                 return -EPERM;
    8390             : 
    8391             :         /* we only allow rename subvolume link between subvolumes */
    8392        2315 :         if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
    8393             :                 return -EXDEV;
    8394             : 
    8395        2312 :         if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
    8396          12 :             (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
    8397             :                 return -ENOTEMPTY;
    8398             : 
    8399        2312 :         if (S_ISDIR(old_inode->i_mode) && new_inode &&
    8400           0 :             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
    8401             :                 return -ENOTEMPTY;
    8402             : 
    8403             : 
    8404             :         /* check for collisions, even if the  name isn't there */
    8405        4624 :         ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
    8406        2312 :                              new_dentry->d_name.name,
    8407        2312 :                              new_dentry->d_name.len);
    8408             : 
    8409        2312 :         if (ret) {
    8410          12 :                 if (ret == -EEXIST) {
    8411             :                         /* we shouldn't get
    8412             :                          * eexist without a new_inode */
    8413          12 :                         if (WARN_ON(!new_inode)) {
    8414             :                                 return ret;
    8415             :                         }
    8416             :                 } else {
    8417             :                         /* maybe -EOVERFLOW */
    8418             :                         return ret;
    8419             :                 }
    8420             :         }
    8421             :         ret = 0;
    8422             : 
    8423             :         /*
    8424             :          * we're using rename to replace one file with another.  Start IO on it
    8425             :          * now so  we don't add too much work to the end of the transaction
    8426             :          */
    8427        2312 :         if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
    8428          12 :                 filemap_flush(old_inode->i_mapping);
    8429             : 
    8430             :         /* close the racy window with snapshot create/destroy ioctl */
    8431        2312 :         if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
    8432           1 :                 down_read(&root->fs_info->subvol_sem);
    8433             :         /*
    8434             :          * We want to reserve the absolute worst case amount of items.  So if
    8435             :          * both inodes are subvols and we need to unlink them then that would
    8436             :          * require 4 item modifications, but if they are both normal inodes it
    8437             :          * would require 5 item modifications, so we'll assume their normal
    8438             :          * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items
    8439             :          * should cover the worst case number of items we'll modify.
    8440             :          */
    8441        2312 :         trans = btrfs_start_transaction(root, 11);
    8442        2312 :         if (IS_ERR(trans)) {
    8443           0 :                 ret = PTR_ERR(trans);
    8444           0 :                 goto out_notrans;
    8445             :         }
    8446             : 
    8447        2312 :         if (dest != root)
    8448           1 :                 btrfs_record_root_in_trans(trans, dest);
    8449             : 
    8450        2312 :         ret = btrfs_set_inode_index(new_dir, &index);
    8451        2312 :         if (ret)
    8452             :                 goto out_fail;
    8453             : 
    8454        2312 :         BTRFS_I(old_inode)->dir_index = 0ULL;
    8455        2312 :         if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
    8456             :                 /* force full log commit if subvolume involved. */
    8457           1 :                 btrfs_set_log_full_commit(root->fs_info, trans);
    8458             :         } else {
    8459        6933 :                 ret = btrfs_insert_inode_ref(trans, dest,
    8460        2311 :                                              new_dentry->d_name.name,
    8461        2311 :                                              new_dentry->d_name.len,
    8462             :                                              old_ino,
    8463             :                                              btrfs_ino(new_dir), index);
    8464        2311 :                 if (ret)
    8465             :                         goto out_fail;
    8466             :                 /*
    8467             :                  * this is an ugly little race, but the rename is required
    8468             :                  * to make sure that if we crash, the inode is either at the
    8469             :                  * old name or the new one.  pinning the log transaction lets
    8470             :                  * us make sure we don't allow a log commit to come in after
    8471             :                  * we unlink the name but before we add the new name back in.
    8472             :                  */
    8473        2311 :                 btrfs_pin_log_trans(root);
    8474             :         }
    8475             : 
    8476             :         inode_inc_iversion(old_dir);
    8477             :         inode_inc_iversion(new_dir);
    8478             :         inode_inc_iversion(old_inode);
    8479        2312 :         old_dir->i_ctime = old_dir->i_mtime = ctime;
    8480        2312 :         new_dir->i_ctime = new_dir->i_mtime = ctime;
    8481        2312 :         old_inode->i_ctime = ctime;
    8482             : 
    8483        2312 :         if (old_dentry->d_parent != new_dentry->d_parent)
    8484        2119 :                 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
    8485             : 
    8486        2312 :         if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
    8487           1 :                 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
    8488           2 :                 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
    8489           1 :                                         old_dentry->d_name.name,
    8490           1 :                                         old_dentry->d_name.len);
    8491             :         } else {
    8492        4622 :                 ret = __btrfs_unlink_inode(trans, root, old_dir,
    8493             :                                         old_dentry->d_inode,
    8494        2311 :                                         old_dentry->d_name.name,
    8495        2311 :                                         old_dentry->d_name.len);
    8496        2311 :                 if (!ret)
    8497        2311 :                         ret = btrfs_update_inode(trans, root, old_inode);
    8498             :         }
    8499        2312 :         if (ret) {
    8500           0 :                 btrfs_abort_transaction(trans, root, ret);
    8501           0 :                 goto out_fail;
    8502             :         }
    8503             : 
    8504        2312 :         if (new_inode) {
    8505             :                 inode_inc_iversion(new_inode);
    8506          12 :                 new_inode->i_ctime = CURRENT_TIME;
    8507          12 :                 if (unlikely(btrfs_ino(new_inode) ==
    8508             :                              BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
    8509             :                         root_objectid = BTRFS_I(new_inode)->location.objectid;
    8510           0 :                         ret = btrfs_unlink_subvol(trans, dest, new_dir,
    8511             :                                                 root_objectid,
    8512           0 :                                                 new_dentry->d_name.name,
    8513           0 :                                                 new_dentry->d_name.len);
    8514           0 :                         BUG_ON(new_inode->i_nlink == 0);
    8515             :                 } else {
    8516          24 :                         ret = btrfs_unlink_inode(trans, dest, new_dir,
    8517             :                                                  new_dentry->d_inode,
    8518          12 :                                                  new_dentry->d_name.name,
    8519          12 :                                                  new_dentry->d_name.len);
    8520             :                 }
    8521          12 :                 if (!ret && new_inode->i_nlink == 0)
    8522          11 :                         ret = btrfs_orphan_add(trans, new_dentry->d_inode);
    8523          12 :                 if (ret) {
    8524           0 :                         btrfs_abort_transaction(trans, root, ret);
    8525           0 :                         goto out_fail;
    8526             :                 }
    8527             :         }
    8528             : 
    8529        6936 :         ret = btrfs_add_link(trans, new_dir, old_inode,
    8530        2312 :                              new_dentry->d_name.name,
    8531        2312 :                              new_dentry->d_name.len, 0, index);
    8532        2312 :         if (ret) {
    8533           0 :                 btrfs_abort_transaction(trans, root, ret);
    8534           0 :                 goto out_fail;
    8535             :         }
    8536             : 
    8537        2312 :         if (old_inode->i_nlink == 1)
    8538        1934 :                 BTRFS_I(old_inode)->dir_index = index;
    8539             : 
    8540        2312 :         if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
    8541        2311 :                 struct dentry *parent = new_dentry->d_parent;
    8542        2311 :                 btrfs_log_new_name(trans, old_inode, old_dir, parent);
    8543        2311 :                 btrfs_end_log_trans(root);
    8544             :         }
    8545             : out_fail:
    8546        2312 :         btrfs_end_transaction(trans, root);
    8547             : out_notrans:
    8548        2312 :         if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
    8549           1 :                 up_read(&root->fs_info->subvol_sem);
    8550             : 
    8551        2312 :         return ret;
    8552             : }
    8553             : 
    8554        2315 : static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
    8555             :                          struct inode *new_dir, struct dentry *new_dentry,
    8556             :                          unsigned int flags)
    8557             : {
    8558        2315 :         if (flags & ~RENAME_NOREPLACE)
    8559             :                 return -EINVAL;
    8560             : 
    8561        2315 :         return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
    8562             : }
    8563             : 
    8564          72 : static void btrfs_run_delalloc_work(struct btrfs_work *work)
    8565             : {
    8566             :         struct btrfs_delalloc_work *delalloc_work;
    8567             :         struct inode *inode;
    8568             : 
    8569             :         delalloc_work = container_of(work, struct btrfs_delalloc_work,
    8570             :                                      work);
    8571          72 :         inode = delalloc_work->inode;
    8572          72 :         if (delalloc_work->wait) {
    8573           0 :                 btrfs_wait_ordered_range(inode, 0, (u64)-1);
    8574             :         } else {
    8575          72 :                 filemap_flush(inode->i_mapping);
    8576          72 :                 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
    8577             :                              &BTRFS_I(inode)->runtime_flags))
    8578           3 :                         filemap_flush(inode->i_mapping);
    8579             :         }
    8580             : 
    8581          72 :         if (delalloc_work->delay_iput)
    8582           0 :                 btrfs_add_delayed_iput(inode);
    8583             :         else
    8584          72 :                 iput(inode);
    8585          72 :         complete(&delalloc_work->completion);
    8586          72 : }
    8587             : 
    8588          72 : struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
    8589             :                                                     int wait, int delay_iput)
    8590             : {
    8591             :         struct btrfs_delalloc_work *work;
    8592             : 
    8593          72 :         work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
    8594          72 :         if (!work)
    8595             :                 return NULL;
    8596             : 
    8597             :         init_completion(&work->completion);
    8598          72 :         INIT_LIST_HEAD(&work->list);
    8599          72 :         work->inode = inode;
    8600          72 :         work->wait = wait;
    8601          72 :         work->delay_iput = delay_iput;
    8602          72 :         WARN_ON_ONCE(!inode);
    8603          72 :         btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
    8604             :                         btrfs_run_delalloc_work, NULL, NULL);
    8605             : 
    8606          72 :         return work;
    8607             : }
    8608             : 
    8609          72 : void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
    8610             : {
    8611          72 :         wait_for_completion(&work->completion);
    8612          72 :         kmem_cache_free(btrfs_delalloc_work_cachep, work);
    8613          72 : }
    8614             : 
    8615             : /*
    8616             :  * some fairly slow code that needs optimization. This walks the list
    8617             :  * of all the inodes with pending delalloc and forces them to disk.
    8618             :  */
    8619         155 : static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
    8620             :                                    int nr)
    8621             : {
    8622             :         struct btrfs_inode *binode;
    8623             :         struct inode *inode;
    8624             :         struct btrfs_delalloc_work *work, *next;
    8625             :         struct list_head works;
    8626             :         struct list_head splice;
    8627             :         int ret = 0;
    8628             : 
    8629             :         INIT_LIST_HEAD(&works);
    8630             :         INIT_LIST_HEAD(&splice);
    8631             : 
    8632         155 :         mutex_lock(&root->delalloc_mutex);
    8633             :         spin_lock(&root->delalloc_lock);
    8634         155 :         list_splice_init(&root->delalloc_inodes, &splice);
    8635         227 :         while (!list_empty(&splice)) {
    8636             :                 binode = list_entry(splice.next, struct btrfs_inode,
    8637             :                                     delalloc_inodes);
    8638             : 
    8639          72 :                 list_move_tail(&binode->delalloc_inodes,
    8640             :                                &root->delalloc_inodes);
    8641          72 :                 inode = igrab(&binode->vfs_inode);
    8642          72 :                 if (!inode) {
    8643           0 :                         cond_resched_lock(&root->delalloc_lock);
    8644           0 :                         continue;
    8645             :                 }
    8646             :                 spin_unlock(&root->delalloc_lock);
    8647             : 
    8648          72 :                 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
    8649          72 :                 if (unlikely(!work)) {
    8650           0 :                         if (delay_iput)
    8651           0 :                                 btrfs_add_delayed_iput(inode);
    8652             :                         else
    8653           0 :                                 iput(inode);
    8654             :                         ret = -ENOMEM;
    8655             :                         goto out;
    8656             :                 }
    8657          72 :                 list_add_tail(&work->list, &works);
    8658          72 :                 btrfs_queue_work(root->fs_info->flush_workers,
    8659             :                                  &work->work);
    8660          72 :                 ret++;
    8661          72 :                 if (nr != -1 && ret >= nr)
    8662             :                         goto out;
    8663          72 :                 cond_resched();
    8664             :                 spin_lock(&root->delalloc_lock);
    8665             :         }
    8666             :         spin_unlock(&root->delalloc_lock);
    8667             : 
    8668             : out:
    8669         227 :         list_for_each_entry_safe(work, next, &works, list) {
    8670             :                 list_del_init(&work->list);
    8671          72 :                 btrfs_wait_and_free_delalloc_work(work);
    8672             :         }
    8673             : 
    8674         155 :         if (!list_empty_careful(&splice)) {
    8675             :                 spin_lock(&root->delalloc_lock);
    8676             :                 list_splice_tail(&splice, &root->delalloc_inodes);
    8677             :                 spin_unlock(&root->delalloc_lock);
    8678             :         }
    8679         155 :         mutex_unlock(&root->delalloc_mutex);
    8680         155 :         return ret;
    8681             : }
    8682             : 
    8683         146 : int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
    8684             : {
    8685             :         int ret;
    8686             : 
    8687         292 :         if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
    8688             :                 return -EROFS;
    8689             : 
    8690         146 :         ret = __start_delalloc_inodes(root, delay_iput, -1);
    8691         146 :         if (ret > 0)
    8692             :                 ret = 0;
    8693             :         /*
    8694             :          * the filemap_flush will queue IO into the worker threads, but
    8695             :          * we have to make sure the IO is actually started and that
    8696             :          * ordered extents get created before we return
    8697             :          */
    8698         146 :         atomic_inc(&root->fs_info->async_submit_draining);
    8699         454 :         while (atomic_read(&root->fs_info->nr_async_submits) ||
    8700             :               atomic_read(&root->fs_info->async_delalloc_pages)) {
    8701          49 :                 wait_event(root->fs_info->async_submit_wait,
    8702             :                    (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
    8703             :                     atomic_read(&root->fs_info->async_delalloc_pages) == 0));
    8704             :         }
    8705         146 :         atomic_dec(&root->fs_info->async_submit_draining);
    8706         146 :         return ret;
    8707             : }
    8708             : 
    8709          90 : int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
    8710             :                                int nr)
    8711             : {
    8712             :         struct btrfs_root *root;
    8713             :         struct list_head splice;
    8714             :         int ret;
    8715             : 
    8716          90 :         if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
    8717             :                 return -EROFS;
    8718             : 
    8719             :         INIT_LIST_HEAD(&splice);
    8720             : 
    8721          90 :         mutex_lock(&fs_info->delalloc_root_mutex);
    8722             :         spin_lock(&fs_info->delalloc_root_lock);
    8723          90 :         list_splice_init(&fs_info->delalloc_roots, &splice);
    8724          99 :         while (!list_empty(&splice) && nr) {
    8725           9 :                 root = list_first_entry(&splice, struct btrfs_root,
    8726             :                                         delalloc_root);
    8727           9 :                 root = btrfs_grab_fs_root(root);
    8728           9 :                 BUG_ON(!root);
    8729           9 :                 list_move_tail(&root->delalloc_root,
    8730             :                                &fs_info->delalloc_roots);
    8731             :                 spin_unlock(&fs_info->delalloc_root_lock);
    8732             : 
    8733           9 :                 ret = __start_delalloc_inodes(root, delay_iput, nr);
    8734           9 :                 btrfs_put_fs_root(root);
    8735           9 :                 if (ret < 0)
    8736             :                         goto out;
    8737             : 
    8738           9 :                 if (nr != -1) {
    8739           0 :                         nr -= ret;
    8740           0 :                         WARN_ON(nr < 0);
    8741             :                 }
    8742             :                 spin_lock(&fs_info->delalloc_root_lock);
    8743             :         }
    8744             :         spin_unlock(&fs_info->delalloc_root_lock);
    8745             : 
    8746             :         ret = 0;
    8747          90 :         atomic_inc(&fs_info->async_submit_draining);
    8748         187 :         while (atomic_read(&fs_info->nr_async_submits) ||
    8749             :               atomic_read(&fs_info->async_delalloc_pages)) {
    8750         499 :                 wait_event(fs_info->async_submit_wait,
    8751             :                    (atomic_read(&fs_info->nr_async_submits) == 0 &&
    8752             :                     atomic_read(&fs_info->async_delalloc_pages) == 0));
    8753             :         }
    8754             :         atomic_dec(&fs_info->async_submit_draining);
    8755             : out:
    8756          90 :         if (!list_empty_careful(&splice)) {
    8757             :                 spin_lock(&fs_info->delalloc_root_lock);
    8758             :                 list_splice_tail(&splice, &fs_info->delalloc_roots);
    8759             :                 spin_unlock(&fs_info->delalloc_root_lock);
    8760             :         }
    8761          90 :         mutex_unlock(&fs_info->delalloc_root_mutex);
    8762          90 :         return ret;
    8763             : }
    8764             : 
    8765        4219 : static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
    8766             :                          const char *symname)
    8767             : {
    8768             :         struct btrfs_trans_handle *trans;
    8769        2172 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    8770             :         struct btrfs_path *path;
    8771             :         struct btrfs_key key;
    8772             :         struct inode *inode = NULL;
    8773             :         int err;
    8774             :         int drop_inode = 0;
    8775             :         u64 objectid;
    8776        2172 :         u64 index = 0;
    8777             :         int name_len;
    8778             :         int datasize;
    8779             :         unsigned long ptr;
    8780             :         struct btrfs_file_extent_item *ei;
    8781             :         struct extent_buffer *leaf;
    8782             : 
    8783        2172 :         name_len = strlen(symname);
    8784        2172 :         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
    8785             :                 return -ENAMETOOLONG;
    8786             : 
    8787             :         /*
    8788             :          * 2 items for inode item and ref
    8789             :          * 2 items for dir items
    8790             :          * 1 item for xattr if selinux is on
    8791             :          */
    8792        2047 :         trans = btrfs_start_transaction(root, 5);
    8793        2047 :         if (IS_ERR(trans))
    8794           0 :                 return PTR_ERR(trans);
    8795             : 
    8796        2047 :         err = btrfs_find_free_ino(root, &objectid);
    8797        2047 :         if (err)
    8798             :                 goto out_unlock;
    8799             : 
    8800        4094 :         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
    8801        2047 :                                 dentry->d_name.len, btrfs_ino(dir), objectid,
    8802             :                                 S_IFLNK|S_IRWXUGO, &index);
    8803        2047 :         if (IS_ERR(inode)) {
    8804           0 :                 err = PTR_ERR(inode);
    8805           0 :                 goto out_unlock;
    8806             :         }
    8807             : 
    8808             :         /*
    8809             :         * If the active LSM wants to access the inode during
    8810             :         * d_instantiate it needs these. Smack checks to see
    8811             :         * if the filesystem supports xattrs by looking at the
    8812             :         * ops vector.
    8813             :         */
    8814        2047 :         inode->i_fop = &btrfs_file_operations;
    8815        2047 :         inode->i_op = &btrfs_file_inode_operations;
    8816        2047 :         inode->i_mapping->a_ops = &btrfs_aops;
    8817        2047 :         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
    8818        2047 :         BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
    8819             : 
    8820        2047 :         err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
    8821        2047 :         if (err)
    8822             :                 goto out_unlock_inode;
    8823             : 
    8824        2047 :         err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
    8825        2047 :         if (err)
    8826             :                 goto out_unlock_inode;
    8827             : 
    8828        2047 :         path = btrfs_alloc_path();
    8829        2047 :         if (!path) {
    8830             :                 err = -ENOMEM;
    8831             :                 goto out_unlock_inode;
    8832             :         }
    8833        2047 :         key.objectid = btrfs_ino(inode);
    8834        2047 :         key.offset = 0;
    8835             :         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
    8836        2047 :         datasize = btrfs_file_extent_calc_inline_size(name_len);
    8837             :         err = btrfs_insert_empty_item(trans, root, path, &key,
    8838             :                                       datasize);
    8839        2047 :         if (err) {
    8840           0 :                 btrfs_free_path(path);
    8841           0 :                 goto out_unlock_inode;
    8842             :         }
    8843        2047 :         leaf = path->nodes[0];
    8844        4094 :         ei = btrfs_item_ptr(leaf, path->slots[0],
    8845             :                             struct btrfs_file_extent_item);
    8846        2047 :         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
    8847             :         btrfs_set_file_extent_type(leaf, ei,
    8848             :                                    BTRFS_FILE_EXTENT_INLINE);
    8849             :         btrfs_set_file_extent_encryption(leaf, ei, 0);
    8850             :         btrfs_set_file_extent_compression(leaf, ei, 0);
    8851             :         btrfs_set_file_extent_other_encoding(leaf, ei, 0);
    8852             :         btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
    8853             : 
    8854             :         ptr = btrfs_file_extent_inline_start(ei);
    8855        2047 :         write_extent_buffer(leaf, symname, ptr, name_len);
    8856        2047 :         btrfs_mark_buffer_dirty(leaf);
    8857        2047 :         btrfs_free_path(path);
    8858             : 
    8859        2047 :         inode->i_op = &btrfs_symlink_inode_operations;
    8860        2047 :         inode->i_mapping->a_ops = &btrfs_symlink_aops;
    8861        2047 :         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
    8862        2047 :         inode_set_bytes(inode, name_len);
    8863             :         btrfs_i_size_write(inode, name_len);
    8864        2047 :         err = btrfs_update_inode(trans, root, inode);
    8865        2047 :         if (err) {
    8866             :                 drop_inode = 1;
    8867             :                 goto out_unlock_inode;
    8868             :         }
    8869             : 
    8870        2047 :         unlock_new_inode(inode);
    8871        2047 :         d_instantiate(dentry, inode);
    8872             : 
    8873             : out_unlock:
    8874        2047 :         btrfs_end_transaction(trans, root);
    8875        2047 :         if (drop_inode) {
    8876             :                 inode_dec_link_count(inode);
    8877           0 :                 iput(inode);
    8878             :         }
    8879        2047 :         btrfs_btree_balance_dirty(root);
    8880        2047 :         return err;
    8881             : 
    8882             : out_unlock_inode:
    8883             :         drop_inode = 1;
    8884           0 :         unlock_new_inode(inode);
    8885           0 :         goto out_unlock;
    8886             : }
    8887             : 
    8888        7582 : static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
    8889             :                                        u64 start, u64 num_bytes, u64 min_size,
    8890             :                                        loff_t actual_len, u64 *alloc_hint,
    8891             :                                        struct btrfs_trans_handle *trans)
    8892             : {
    8893        7582 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
    8894             :         struct extent_map *em;
    8895        7582 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    8896             :         struct btrfs_key ins;
    8897             :         u64 cur_offset = start;
    8898             :         u64 i_size;
    8899             :         u64 cur_bytes;
    8900             :         int ret = 0;
    8901             :         bool own_trans = true;
    8902             : 
    8903        7582 :         if (trans)
    8904             :                 own_trans = false;
    8905       15164 :         while (num_bytes > 0) {
    8906        7582 :                 if (own_trans) {
    8907        3607 :                         trans = btrfs_start_transaction(root, 3);
    8908        3607 :                         if (IS_ERR(trans)) {
    8909           0 :                                 ret = PTR_ERR(trans);
    8910           0 :                                 break;
    8911             :                         }
    8912             :                 }
    8913             : 
    8914        7582 :                 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
    8915        7582 :                 cur_bytes = max(cur_bytes, min_size);
    8916        7582 :                 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
    8917             :                                            *alloc_hint, &ins, 1, 0);
    8918        7582 :                 if (ret) {
    8919           0 :                         if (own_trans)
    8920           0 :                                 btrfs_end_transaction(trans, root);
    8921             :                         break;
    8922             :                 }
    8923             : 
    8924        7582 :                 ret = insert_reserved_file_extent(trans, inode,
    8925             :                                                   cur_offset, ins.objectid,
    8926             :                                                   ins.offset, ins.offset,
    8927             :                                                   ins.offset, 0, 0, 0,
    8928             :                                                   BTRFS_FILE_EXTENT_PREALLOC);
    8929        7582 :                 if (ret) {
    8930           0 :                         btrfs_free_reserved_extent(root, ins.objectid,
    8931             :                                                    ins.offset, 0);
    8932           0 :                         btrfs_abort_transaction(trans, root, ret);
    8933           0 :                         if (own_trans)
    8934           0 :                                 btrfs_end_transaction(trans, root);
    8935             :                         break;
    8936             :                 }
    8937        7582 :                 btrfs_drop_extent_cache(inode, cur_offset,
    8938        7582 :                                         cur_offset + ins.offset -1, 0);
    8939             : 
    8940        7582 :                 em = alloc_extent_map();
    8941        7582 :                 if (!em) {
    8942             :                         set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    8943             :                                 &BTRFS_I(inode)->runtime_flags);
    8944             :                         goto next;
    8945             :                 }
    8946             : 
    8947        7582 :                 em->start = cur_offset;
    8948        7582 :                 em->orig_start = cur_offset;
    8949        7582 :                 em->len = ins.offset;
    8950        7582 :                 em->block_start = ins.objectid;
    8951        7582 :                 em->block_len = ins.offset;
    8952        7582 :                 em->orig_block_len = ins.offset;
    8953        7582 :                 em->ram_bytes = ins.offset;
    8954        7582 :                 em->bdev = root->fs_info->fs_devices->latest_bdev;
    8955             :                 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
    8956        7582 :                 em->generation = trans->transid;
    8957             : 
    8958             :                 while (1) {
    8959        7582 :                         write_lock(&em_tree->lock);
    8960        7582 :                         ret = add_extent_mapping(em_tree, em, 1);
    8961             :                         write_unlock(&em_tree->lock);
    8962        7582 :                         if (ret != -EEXIST)
    8963             :                                 break;
    8964           0 :                         btrfs_drop_extent_cache(inode, cur_offset,
    8965           0 :                                                 cur_offset + ins.offset - 1,
    8966             :                                                 0);
    8967           0 :                 }
    8968        7582 :                 free_extent_map(em);
    8969             : next:
    8970        7582 :                 num_bytes -= ins.offset;
    8971        7582 :                 cur_offset += ins.offset;
    8972        7582 :                 *alloc_hint = ins.objectid + ins.offset;
    8973             : 
    8974             :                 inode_inc_iversion(inode);
    8975        7582 :                 inode->i_ctime = CURRENT_TIME;
    8976        7582 :                 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
    8977       13012 :                 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
    8978       10795 :                     (actual_len > inode->i_size) &&
    8979        5365 :                     (cur_offset > inode->i_size)) {
    8980        5309 :                         if (cur_offset > actual_len)
    8981             :                                 i_size = actual_len;
    8982             :                         else
    8983             :                                 i_size = cur_offset;
    8984        5309 :                         i_size_write(inode, i_size);
    8985        5309 :                         btrfs_ordered_update_i_size(inode, i_size, NULL);
    8986             :                 }
    8987             : 
    8988        7582 :                 ret = btrfs_update_inode(trans, root, inode);
    8989             : 
    8990        7582 :                 if (ret) {
    8991           0 :                         btrfs_abort_transaction(trans, root, ret);
    8992           0 :                         if (own_trans)
    8993           0 :                                 btrfs_end_transaction(trans, root);
    8994             :                         break;
    8995             :                 }
    8996             : 
    8997        7582 :                 if (own_trans)
    8998        3607 :                         btrfs_end_transaction(trans, root);
    8999             :         }
    9000        7582 :         return ret;
    9001             : }
    9002             : 
    9003        3607 : int btrfs_prealloc_file_range(struct inode *inode, int mode,
    9004             :                               u64 start, u64 num_bytes, u64 min_size,
    9005             :                               loff_t actual_len, u64 *alloc_hint)
    9006             : {
    9007        3607 :         return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
    9008             :                                            min_size, actual_len, alloc_hint,
    9009             :                                            NULL);
    9010             : }
    9011             : 
    9012        3975 : int btrfs_prealloc_file_range_trans(struct inode *inode,
    9013             :                                     struct btrfs_trans_handle *trans, int mode,
    9014             :                                     u64 start, u64 num_bytes, u64 min_size,
    9015             :                                     loff_t actual_len, u64 *alloc_hint)
    9016             : {
    9017        3975 :         return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
    9018             :                                            min_size, actual_len, alloc_hint, trans);
    9019             : }
    9020             : 
    9021     1363190 : static int btrfs_set_page_dirty(struct page *page)
    9022             : {
    9023     1363190 :         return __set_page_dirty_nobuffers(page);
    9024             : }
    9025             : 
    9026     6492852 : static int btrfs_permission(struct inode *inode, int mask)
    9027             : {
    9028     6578392 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    9029     6492852 :         umode_t mode = inode->i_mode;
    9030             : 
    9031     6578696 :         if (mask & MAY_WRITE &&
    9032       86147 :             (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
    9033       85540 :                 if (btrfs_root_readonly(root))
    9034             :                         return -EROFS;
    9035       85539 :                 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
    9036             :                         return -EACCES;
    9037             :         }
    9038     6492848 :         return generic_permission(inode, mask);
    9039             : }
    9040             : 
    9041           2 : static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
    9042             : {
    9043             :         struct btrfs_trans_handle *trans;
    9044           2 :         struct btrfs_root *root = BTRFS_I(dir)->root;
    9045             :         struct inode *inode = NULL;
    9046             :         u64 objectid;
    9047             :         u64 index;
    9048             :         int ret = 0;
    9049             : 
    9050             :         /*
    9051             :          * 5 units required for adding orphan entry
    9052             :          */
    9053           2 :         trans = btrfs_start_transaction(root, 5);
    9054           2 :         if (IS_ERR(trans))
    9055           0 :                 return PTR_ERR(trans);
    9056             : 
    9057           2 :         ret = btrfs_find_free_ino(root, &objectid);
    9058           2 :         if (ret)
    9059             :                 goto out;
    9060             : 
    9061           4 :         inode = btrfs_new_inode(trans, root, dir, NULL, 0,
    9062             :                                 btrfs_ino(dir), objectid, mode, &index);
    9063           2 :         if (IS_ERR(inode)) {
    9064           0 :                 ret = PTR_ERR(inode);
    9065             :                 inode = NULL;
    9066           0 :                 goto out;
    9067             :         }
    9068             : 
    9069           2 :         inode->i_fop = &btrfs_file_operations;
    9070           2 :         inode->i_op = &btrfs_file_inode_operations;
    9071             : 
    9072           2 :         inode->i_mapping->a_ops = &btrfs_aops;
    9073           2 :         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
    9074           2 :         BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
    9075             : 
    9076           2 :         ret = btrfs_init_inode_security(trans, inode, dir, NULL);
    9077           2 :         if (ret)
    9078             :                 goto out_inode;
    9079             : 
    9080           2 :         ret = btrfs_update_inode(trans, root, inode);
    9081           2 :         if (ret)
    9082             :                 goto out_inode;
    9083           2 :         ret = btrfs_orphan_add(trans, inode);
    9084           2 :         if (ret)
    9085             :                 goto out_inode;
    9086             : 
    9087             :         /*
    9088             :          * We set number of links to 0 in btrfs_new_inode(), and here we set
    9089             :          * it to 1 because d_tmpfile() will issue a warning if the count is 0,
    9090             :          * through:
    9091             :          *
    9092             :          *    d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
    9093             :          */
    9094           2 :         set_nlink(inode, 1);
    9095           2 :         unlock_new_inode(inode);
    9096           2 :         d_tmpfile(dentry, inode);
    9097             :         mark_inode_dirty(inode);
    9098             : 
    9099             : out:
    9100           2 :         btrfs_end_transaction(trans, root);
    9101           2 :         if (ret)
    9102           0 :                 iput(inode);
    9103           2 :         btrfs_balance_delayed_items(root);
    9104           2 :         btrfs_btree_balance_dirty(root);
    9105           2 :         return ret;
    9106             : 
    9107             : out_inode:
    9108           0 :         unlock_new_inode(inode);
    9109           0 :         goto out;
    9110             : 
    9111             : }
    9112             : 
    9113             : static const struct inode_operations btrfs_dir_inode_operations = {
    9114             :         .getattr        = btrfs_getattr,
    9115             :         .lookup         = btrfs_lookup,
    9116             :         .create         = btrfs_create,
    9117             :         .unlink         = btrfs_unlink,
    9118             :         .link           = btrfs_link,
    9119             :         .mkdir          = btrfs_mkdir,
    9120             :         .rmdir          = btrfs_rmdir,
    9121             :         .rename2        = btrfs_rename2,
    9122             :         .symlink        = btrfs_symlink,
    9123             :         .setattr        = btrfs_setattr,
    9124             :         .mknod          = btrfs_mknod,
    9125             :         .setxattr       = btrfs_setxattr,
    9126             :         .getxattr       = btrfs_getxattr,
    9127             :         .listxattr      = btrfs_listxattr,
    9128             :         .removexattr    = btrfs_removexattr,
    9129             :         .permission     = btrfs_permission,
    9130             :         .get_acl        = btrfs_get_acl,
    9131             :         .set_acl        = btrfs_set_acl,
    9132             :         .update_time    = btrfs_update_time,
    9133             :         .tmpfile        = btrfs_tmpfile,
    9134             : };
    9135             : static const struct inode_operations btrfs_dir_ro_inode_operations = {
    9136             :         .lookup         = btrfs_lookup,
    9137             :         .permission     = btrfs_permission,
    9138             :         .get_acl        = btrfs_get_acl,
    9139             :         .set_acl        = btrfs_set_acl,
    9140             :         .update_time    = btrfs_update_time,
    9141             : };
    9142             : 
    9143             : static const struct file_operations btrfs_dir_file_operations = {
    9144             :         .llseek         = generic_file_llseek,
    9145             :         .read           = generic_read_dir,
    9146             :         .iterate        = btrfs_real_readdir,
    9147             :         .unlocked_ioctl = btrfs_ioctl,
    9148             : #ifdef CONFIG_COMPAT
    9149             :         .compat_ioctl   = btrfs_ioctl,
    9150             : #endif
    9151             :         .release        = btrfs_release_file,
    9152             :         .fsync          = btrfs_sync_file,
    9153             : };
    9154             : 
    9155             : static struct extent_io_ops btrfs_extent_io_ops = {
    9156             :         .fill_delalloc = run_delalloc_range,
    9157             :         .submit_bio_hook = btrfs_submit_bio_hook,
    9158             :         .merge_bio_hook = btrfs_merge_bio_hook,
    9159             :         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
    9160             :         .writepage_end_io_hook = btrfs_writepage_end_io_hook,
    9161             :         .writepage_start_hook = btrfs_writepage_start_hook,
    9162             :         .set_bit_hook = btrfs_set_bit_hook,
    9163             :         .clear_bit_hook = btrfs_clear_bit_hook,
    9164             :         .merge_extent_hook = btrfs_merge_extent_hook,
    9165             :         .split_extent_hook = btrfs_split_extent_hook,
    9166             : };
    9167             : 
    9168             : /*
    9169             :  * btrfs doesn't support the bmap operation because swapfiles
    9170             :  * use bmap to make a mapping of extents in the file.  They assume
    9171             :  * these extents won't change over the life of the file and they
    9172             :  * use the bmap result to do IO directly to the drive.
    9173             :  *
    9174             :  * the btrfs bmap call would return logical addresses that aren't
    9175             :  * suitable for IO and they also will change frequently as COW
    9176             :  * operations happen.  So, swapfile + btrfs == corruption.
    9177             :  *
    9178             :  * For now we're avoiding this by dropping bmap.
    9179             :  */
    9180             : static const struct address_space_operations btrfs_aops = {
    9181             :         .readpage       = btrfs_readpage,
    9182             :         .writepage      = btrfs_writepage,
    9183             :         .writepages     = btrfs_writepages,
    9184             :         .readpages      = btrfs_readpages,
    9185             :         .direct_IO      = btrfs_direct_IO,
    9186             :         .invalidatepage = btrfs_invalidatepage,
    9187             :         .releasepage    = btrfs_releasepage,
    9188             :         .set_page_dirty = btrfs_set_page_dirty,
    9189             :         .error_remove_page = generic_error_remove_page,
    9190             : };
    9191             : 
    9192             : static const struct address_space_operations btrfs_symlink_aops = {
    9193             :         .readpage       = btrfs_readpage,
    9194             :         .writepage      = btrfs_writepage,
    9195             :         .invalidatepage = btrfs_invalidatepage,
    9196             :         .releasepage    = btrfs_releasepage,
    9197             : };
    9198             : 
    9199             : static const struct inode_operations btrfs_file_inode_operations = {
    9200             :         .getattr        = btrfs_getattr,
    9201             :         .setattr        = btrfs_setattr,
    9202             :         .setxattr       = btrfs_setxattr,
    9203             :         .getxattr       = btrfs_getxattr,
    9204             :         .listxattr      = btrfs_listxattr,
    9205             :         .removexattr    = btrfs_removexattr,
    9206             :         .permission     = btrfs_permission,
    9207             :         .fiemap         = btrfs_fiemap,
    9208             :         .get_acl        = btrfs_get_acl,
    9209             :         .set_acl        = btrfs_set_acl,
    9210             :         .update_time    = btrfs_update_time,
    9211             : };
    9212             : static const struct inode_operations btrfs_special_inode_operations = {
    9213             :         .getattr        = btrfs_getattr,
    9214             :         .setattr        = btrfs_setattr,
    9215             :         .permission     = btrfs_permission,
    9216             :         .setxattr       = btrfs_setxattr,
    9217             :         .getxattr       = btrfs_getxattr,
    9218             :         .listxattr      = btrfs_listxattr,
    9219             :         .removexattr    = btrfs_removexattr,
    9220             :         .get_acl        = btrfs_get_acl,
    9221             :         .set_acl        = btrfs_set_acl,
    9222             :         .update_time    = btrfs_update_time,
    9223             : };
    9224             : static const struct inode_operations btrfs_symlink_inode_operations = {
    9225             :         .readlink       = generic_readlink,
    9226             :         .follow_link    = page_follow_link_light,
    9227             :         .put_link       = page_put_link,
    9228             :         .getattr        = btrfs_getattr,
    9229             :         .setattr        = btrfs_setattr,
    9230             :         .permission     = btrfs_permission,
    9231             :         .setxattr       = btrfs_setxattr,
    9232             :         .getxattr       = btrfs_getxattr,
    9233             :         .listxattr      = btrfs_listxattr,
    9234             :         .removexattr    = btrfs_removexattr,
    9235             :         .update_time    = btrfs_update_time,
    9236             : };
    9237             : 
    9238             : const struct dentry_operations btrfs_dentry_operations = {
    9239             :         .d_delete       = btrfs_dentry_delete,
    9240             :         .d_release      = btrfs_dentry_release,
    9241             : };

Generated by: LCOV version 1.10