LCOV - code coverage report
Current view: top level - fs/btrfs - file.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 835 1109 75.3 %
Date: 2014-11-28 Functions: 28 35 80.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : 
      19             : #include <linux/fs.h>
      20             : #include <linux/pagemap.h>
      21             : #include <linux/highmem.h>
      22             : #include <linux/time.h>
      23             : #include <linux/init.h>
      24             : #include <linux/string.h>
      25             : #include <linux/backing-dev.h>
      26             : #include <linux/mpage.h>
      27             : #include <linux/aio.h>
      28             : #include <linux/falloc.h>
      29             : #include <linux/swap.h>
      30             : #include <linux/writeback.h>
      31             : #include <linux/statfs.h>
      32             : #include <linux/compat.h>
      33             : #include <linux/slab.h>
      34             : #include <linux/btrfs.h>
      35             : #include "ctree.h"
      36             : #include "disk-io.h"
      37             : #include "transaction.h"
      38             : #include "btrfs_inode.h"
      39             : #include "print-tree.h"
      40             : #include "tree-log.h"
      41             : #include "locking.h"
      42             : #include "volumes.h"
      43             : #include "qgroup.h"
      44             : 
      45             : static struct kmem_cache *btrfs_inode_defrag_cachep;
      46             : /*
      47             :  * when auto defrag is enabled we
      48             :  * queue up these defrag structs to remember which
      49             :  * inodes need defragging passes
      50             :  */
      51             : struct inode_defrag {
      52             :         struct rb_node rb_node;
      53             :         /* objectid */
      54             :         u64 ino;
      55             :         /*
      56             :          * transid where the defrag was added, we search for
      57             :          * extents newer than this
      58             :          */
      59             :         u64 transid;
      60             : 
      61             :         /* root objectid */
      62             :         u64 root;
      63             : 
      64             :         /* last offset we were able to defrag */
      65             :         u64 last_offset;
      66             : 
      67             :         /* if we've wrapped around back to zero once already */
      68             :         int cycled;
      69             : };
      70             : 
      71             : static int __compare_inode_defrag(struct inode_defrag *defrag1,
      72             :                                   struct inode_defrag *defrag2)
      73             : {
      74           0 :         if (defrag1->root > defrag2->root)
      75             :                 return 1;
      76           0 :         else if (defrag1->root < defrag2->root)
      77             :                 return -1;
      78           0 :         else if (defrag1->ino > defrag2->ino)
      79             :                 return 1;
      80           0 :         else if (defrag1->ino < defrag2->ino)
      81             :                 return -1;
      82             :         else
      83             :                 return 0;
      84             : }
      85             : 
      86             : /* pop a record for an inode into the defrag tree.  The lock
      87             :  * must be held already
      88             :  *
      89             :  * If you're inserting a record for an older transid than an
      90             :  * existing record, the transid already in the tree is lowered
      91             :  *
      92             :  * If an existing record is found the defrag item you
      93             :  * pass in is freed
      94             :  */
      95           0 : static int __btrfs_add_inode_defrag(struct inode *inode,
      96           0 :                                     struct inode_defrag *defrag)
      97             : {
      98           0 :         struct btrfs_root *root = BTRFS_I(inode)->root;
      99           0 :         struct inode_defrag *entry;
     100             :         struct rb_node **p;
     101             :         struct rb_node *parent = NULL;
     102             :         int ret;
     103             : 
     104           0 :         p = &root->fs_info->defrag_inodes.rb_node;
     105           0 :         while (*p) {
     106             :                 parent = *p;
     107             :                 entry = rb_entry(parent, struct inode_defrag, rb_node);
     108             : 
     109             :                 ret = __compare_inode_defrag(defrag, entry);
     110           0 :                 if (ret < 0)
     111           0 :                         p = &parent->rb_left;
     112           0 :                 else if (ret > 0)
     113           0 :                         p = &parent->rb_right;
     114             :                 else {
     115             :                         /* if we're reinserting an entry for
     116             :                          * an old defrag run, make sure to
     117             :                          * lower the transid of our existing record
     118             :                          */
     119           0 :                         if (defrag->transid < entry->transid)
     120           0 :                                 entry->transid = defrag->transid;
     121           0 :                         if (defrag->last_offset > entry->last_offset)
     122           0 :                                 entry->last_offset = defrag->last_offset;
     123             :                         return -EEXIST;
     124             :                 }
     125             :         }
     126             :         set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
     127           0 :         rb_link_node(&defrag->rb_node, parent, p);
     128           0 :         rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
     129           0 :         return 0;
     130             : }
     131             : 
     132             : static inline int __need_auto_defrag(struct btrfs_root *root)
     133             : {
     134       12202 :         if (!btrfs_test_opt(root, AUTO_DEFRAG))
     135             :                 return 0;
     136             : 
     137           0 :         if (btrfs_fs_closing(root->fs_info))
     138             :                 return 0;
     139             : 
     140             :         return 1;
     141             : }
     142             : 
     143             : /*
     144             :  * insert a defrag record for this inode if auto defrag is
     145             :  * enabled
     146             :  */
     147       11907 : int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
     148             :                            struct inode *inode)
     149             : {
     150       11907 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     151             :         struct inode_defrag *defrag;
     152             :         u64 transid;
     153             :         int ret;
     154             : 
     155       11907 :         if (!__need_auto_defrag(root))
     156             :                 return 0;
     157             : 
     158           0 :         if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
     159             :                 return 0;
     160             : 
     161           0 :         if (trans)
     162           0 :                 transid = trans->transid;
     163             :         else
     164           0 :                 transid = BTRFS_I(inode)->root->last_trans;
     165             : 
     166           0 :         defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
     167           0 :         if (!defrag)
     168             :                 return -ENOMEM;
     169             : 
     170           0 :         defrag->ino = btrfs_ino(inode);
     171           0 :         defrag->transid = transid;
     172           0 :         defrag->root = root->root_key.objectid;
     173             : 
     174           0 :         spin_lock(&root->fs_info->defrag_inodes_lock);
     175           0 :         if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
     176             :                 /*
     177             :                  * If we set IN_DEFRAG flag and evict the inode from memory,
     178             :                  * and then re-read this inode, this new inode doesn't have
     179             :                  * IN_DEFRAG flag. At the case, we may find the existed defrag.
     180             :                  */
     181           0 :                 ret = __btrfs_add_inode_defrag(inode, defrag);
     182           0 :                 if (ret)
     183           0 :                         kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
     184             :         } else {
     185           0 :                 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
     186             :         }
     187           0 :         spin_unlock(&root->fs_info->defrag_inodes_lock);
     188           0 :         return 0;
     189             : }
     190             : 
     191             : /*
     192             :  * Requeue the defrag object. If there is a defrag object that points to
     193             :  * the same inode in the tree, we will merge them together (by
     194             :  * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
     195             :  */
     196           0 : static void btrfs_requeue_inode_defrag(struct inode *inode,
     197             :                                        struct inode_defrag *defrag)
     198             : {
     199           0 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     200             :         int ret;
     201             : 
     202           0 :         if (!__need_auto_defrag(root))
     203             :                 goto out;
     204             : 
     205             :         /*
     206             :          * Here we don't check the IN_DEFRAG flag, because we need merge
     207             :          * them together.
     208             :          */
     209           0 :         spin_lock(&root->fs_info->defrag_inodes_lock);
     210           0 :         ret = __btrfs_add_inode_defrag(inode, defrag);
     211           0 :         spin_unlock(&root->fs_info->defrag_inodes_lock);
     212           0 :         if (ret)
     213             :                 goto out;
     214           0 :         return;
     215             : out:
     216           0 :         kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
     217             : }
     218             : 
     219             : /*
     220             :  * pick the defragable inode that we want, if it doesn't exist, we will get
     221             :  * the next one.
     222             :  */
     223             : static struct inode_defrag *
     224           0 : btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
     225             : {
     226           0 :         struct inode_defrag *entry = NULL;
     227             :         struct inode_defrag tmp;
     228             :         struct rb_node *p;
     229             :         struct rb_node *parent = NULL;
     230             :         int ret;
     231             : 
     232             :         tmp.ino = ino;
     233             :         tmp.root = root;
     234             : 
     235             :         spin_lock(&fs_info->defrag_inodes_lock);
     236           0 :         p = fs_info->defrag_inodes.rb_node;
     237           0 :         while (p) {
     238             :                 parent = p;
     239             :                 entry = rb_entry(parent, struct inode_defrag, rb_node);
     240             : 
     241             :                 ret = __compare_inode_defrag(&tmp, entry);
     242           0 :                 if (ret < 0)
     243           0 :                         p = parent->rb_left;
     244           0 :                 else if (ret > 0)
     245           0 :                         p = parent->rb_right;
     246             :                 else
     247             :                         goto out;
     248             :         }
     249             : 
     250           0 :         if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
     251           0 :                 parent = rb_next(parent);
     252           0 :                 if (parent)
     253             :                         entry = rb_entry(parent, struct inode_defrag, rb_node);
     254             :                 else
     255             :                         entry = NULL;
     256             :         }
     257             : out:
     258           0 :         if (entry)
     259           0 :                 rb_erase(parent, &fs_info->defrag_inodes);
     260             :         spin_unlock(&fs_info->defrag_inodes_lock);
     261           0 :         return entry;
     262             : }
     263             : 
     264         221 : void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
     265             : {
     266             :         struct inode_defrag *defrag;
     267             :         struct rb_node *node;
     268             : 
     269             :         spin_lock(&fs_info->defrag_inodes_lock);
     270         221 :         node = rb_first(&fs_info->defrag_inodes);
     271         442 :         while (node) {
     272           0 :                 rb_erase(node, &fs_info->defrag_inodes);
     273             :                 defrag = rb_entry(node, struct inode_defrag, rb_node);
     274           0 :                 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
     275             : 
     276           0 :                 if (need_resched()) {
     277             :                         spin_unlock(&fs_info->defrag_inodes_lock);
     278           0 :                         cond_resched();
     279             :                         spin_lock(&fs_info->defrag_inodes_lock);
     280             :                 }
     281             : 
     282           0 :                 node = rb_first(&fs_info->defrag_inodes);
     283             :         }
     284             :         spin_unlock(&fs_info->defrag_inodes_lock);
     285         221 : }
     286             : 
     287             : #define BTRFS_DEFRAG_BATCH      1024
     288             : 
     289           0 : static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
     290             :                                     struct inode_defrag *defrag)
     291             : {
     292             :         struct btrfs_root *inode_root;
     293             :         struct inode *inode;
     294             :         struct btrfs_key key;
     295             :         struct btrfs_ioctl_defrag_range_args range;
     296             :         int num_defrag;
     297             :         int index;
     298             :         int ret;
     299             : 
     300             :         /* get the inode */
     301           0 :         key.objectid = defrag->root;
     302             :         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
     303           0 :         key.offset = (u64)-1;
     304             : 
     305           0 :         index = srcu_read_lock(&fs_info->subvol_srcu);
     306             : 
     307             :         inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
     308           0 :         if (IS_ERR(inode_root)) {
     309           0 :                 ret = PTR_ERR(inode_root);
     310           0 :                 goto cleanup;
     311             :         }
     312             : 
     313           0 :         key.objectid = defrag->ino;
     314             :         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
     315           0 :         key.offset = 0;
     316           0 :         inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
     317           0 :         if (IS_ERR(inode)) {
     318           0 :                 ret = PTR_ERR(inode);
     319           0 :                 goto cleanup;
     320             :         }
     321             :         srcu_read_unlock(&fs_info->subvol_srcu, index);
     322             : 
     323             :         /* do a chunk of defrag */
     324             :         clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
     325           0 :         memset(&range, 0, sizeof(range));
     326           0 :         range.len = (u64)-1;
     327           0 :         range.start = defrag->last_offset;
     328             : 
     329           0 :         sb_start_write(fs_info->sb);
     330           0 :         num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
     331             :                                        BTRFS_DEFRAG_BATCH);
     332           0 :         sb_end_write(fs_info->sb);
     333             :         /*
     334             :          * if we filled the whole defrag batch, there
     335             :          * must be more work to do.  Queue this defrag
     336             :          * again
     337             :          */
     338           0 :         if (num_defrag == BTRFS_DEFRAG_BATCH) {
     339           0 :                 defrag->last_offset = range.start;
     340           0 :                 btrfs_requeue_inode_defrag(inode, defrag);
     341           0 :         } else if (defrag->last_offset && !defrag->cycled) {
     342             :                 /*
     343             :                  * we didn't fill our defrag batch, but
     344             :                  * we didn't start at zero.  Make sure we loop
     345             :                  * around to the start of the file.
     346             :                  */
     347           0 :                 defrag->last_offset = 0;
     348           0 :                 defrag->cycled = 1;
     349           0 :                 btrfs_requeue_inode_defrag(inode, defrag);
     350             :         } else {
     351           0 :                 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
     352             :         }
     353             : 
     354           0 :         iput(inode);
     355           0 :         return 0;
     356             : cleanup:
     357             :         srcu_read_unlock(&fs_info->subvol_srcu, index);
     358           0 :         kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
     359           0 :         return ret;
     360             : }
     361             : 
     362             : /*
     363             :  * run through the list of inodes in the FS that need
     364             :  * defragging
     365             :  */
     366         295 : int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
     367             : {
     368             :         struct inode_defrag *defrag;
     369             :         u64 first_ino = 0;
     370             :         u64 root_objectid = 0;
     371             : 
     372         295 :         atomic_inc(&fs_info->defrag_running);
     373             :         while (1) {
     374             :                 /* Pause the auto defragger. */
     375         295 :                 if (test_bit(BTRFS_FS_STATE_REMOUNTING,
     376             :                              &fs_info->fs_state))
     377             :                         break;
     378             : 
     379         590 :                 if (!__need_auto_defrag(fs_info->tree_root))
     380             :                         break;
     381             : 
     382             :                 /* find an inode to defrag */
     383           0 :                 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
     384             :                                                  first_ino);
     385           0 :                 if (!defrag) {
     386           0 :                         if (root_objectid || first_ino) {
     387             :                                 root_objectid = 0;
     388             :                                 first_ino = 0;
     389           0 :                                 continue;
     390             :                         } else {
     391             :                                 break;
     392             :                         }
     393             :                 }
     394             : 
     395           0 :                 first_ino = defrag->ino + 1;
     396           0 :                 root_objectid = defrag->root;
     397             : 
     398           0 :                 __btrfs_run_defrag_inode(fs_info, defrag);
     399             :         }
     400             :         atomic_dec(&fs_info->defrag_running);
     401             : 
     402             :         /*
     403             :          * during unmount, we use the transaction_wait queue to
     404             :          * wait for the defragger to stop
     405             :          */
     406         295 :         wake_up(&fs_info->transaction_wait);
     407         295 :         return 0;
     408             : }
     409             : 
     410             : /* simple helper to fault in pages and copy.  This should go away
     411             :  * and be replaced with calls into generic code.
     412             :  */
     413      118028 : static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
     414             :                                          size_t write_bytes,
     415             :                                          struct page **prepared_pages,
     416             :                                          struct iov_iter *i)
     417             : {
     418             :         size_t copied = 0;
     419             :         size_t total_copied = 0;
     420             :         int pg = 0;
     421      118028 :         int offset = pos & (PAGE_CACHE_SIZE - 1);
     422             : 
     423     1268380 :         while (write_bytes > 0) {
     424     1150351 :                 size_t count = min_t(size_t,
     425             :                                      PAGE_CACHE_SIZE - offset, write_bytes);
     426     1150351 :                 struct page *page = prepared_pages[pg];
     427             :                 /*
     428             :                  * Copy data from userspace to the current page
     429             :                  */
     430     1150351 :                 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
     431             : 
     432             :                 /* Flush processor's dcache for this page */
     433             :                 flush_dcache_page(page);
     434             : 
     435             :                 /*
     436             :                  * if we get a partial write, we can end up with
     437             :                  * partially up to date pages.  These add
     438             :                  * a lot of complexity, so make sure they don't
     439             :                  * happen by forcing this copy to be retried.
     440             :                  *
     441             :                  * The rest of the btrfs_file_write code will fall
     442             :                  * back to page at a time copies after we return 0.
     443             :                  */
     444     1150357 :                 if (!PageUptodate(page) && copied < count)
     445             :                         copied = 0;
     446             : 
     447     1150357 :                 iov_iter_advance(i, copied);
     448     1150351 :                 write_bytes -= copied;
     449     1150351 :                 total_copied += copied;
     450             : 
     451             :                 /* Return to btrfs_file_write_iter to fault page */
     452     1150351 :                 if (unlikely(copied == 0))
     453             :                         break;
     454             : 
     455     1150352 :                 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
     456       42920 :                         offset += copied;
     457             :                 } else {
     458     1107432 :                         pg++;
     459             :                         offset = 0;
     460             :                 }
     461             :         }
     462      118028 :         return total_copied;
     463             : }
     464             : 
     465             : /*
     466             :  * unlocks pages after btrfs_file_write is done with them
     467             :  */
     468      118029 : static void btrfs_drop_pages(struct page **pages, size_t num_pages)
     469             : {
     470             :         size_t i;
     471     1268397 :         for (i = 0; i < num_pages; i++) {
     472             :                 /* page checked is some magic around finding pages that
     473             :                  * have been modified without going through btrfs_set_page_dirty
     474             :                  * clear it here. There should be no need to mark the pages
     475             :                  * accessed as prepare_pages should have marked them accessed
     476             :                  * in prepare_pages via find_or_create_page()
     477             :                  */
     478     1150369 :                 ClearPageChecked(pages[i]);
     479     1150367 :                 unlock_page(pages[i]);
     480     1150366 :                 page_cache_release(pages[i]);
     481             :         }
     482      118028 : }
     483             : 
     484             : /*
     485             :  * after copy_from_user, pages need to be dirtied and we need to make
     486             :  * sure holes are created between the current EOF and the start of
     487             :  * any next extents (if required).
     488             :  *
     489             :  * this also makes the decision about creating an inline extent vs
     490             :  * doing real data extents, marking pages dirty and delalloc as required.
     491             :  */
     492      244170 : int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
     493             :                              struct page **pages, size_t num_pages,
     494             :                              loff_t pos, size_t write_bytes,
     495             :                              struct extent_state **cached)
     496             : {
     497             :         int err = 0;
     498             :         int i;
     499             :         u64 num_bytes;
     500             :         u64 start_pos;
     501             :         u64 end_of_last_block;
     502      122085 :         u64 end_pos = pos + write_bytes;
     503             :         loff_t isize = i_size_read(inode);
     504             : 
     505      122085 :         start_pos = pos & ~((u64)root->sectorsize - 1);
     506      122085 :         num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
     507             : 
     508      122085 :         end_of_last_block = start_pos + num_bytes - 1;
     509      122085 :         err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
     510             :                                         cached);
     511      122085 :         if (err)
     512             :                 return err;
     513             : 
     514     1339903 :         for (i = 0; i < num_pages; i++) {
     515     1339903 :                 struct page *p = pages[i];
     516             :                 SetPageUptodate(p);
     517             :                 ClearPageChecked(p);
     518     1339905 :                 set_page_dirty(p);
     519             :         }
     520             : 
     521             :         /*
     522             :          * we've only changed i_size in ram, and we haven't updated
     523             :          * the disk i_size.  There is no need to log the inode
     524             :          * at this time.
     525             :          */
     526      122085 :         if (end_pos > isize)
     527      114746 :                 i_size_write(inode, end_pos);
     528             :         return 0;
     529             : }
     530             : 
     531             : /*
     532             :  * this drops all the extents in the cache that intersect the range
     533             :  * [start, end].  Existing extents are split as required.
     534             :  */
     535      116261 : void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
     536             :                              int skip_pinned)
     537             : {
     538             :         struct extent_map *em;
     539             :         struct extent_map *split = NULL;
     540             :         struct extent_map *split2 = NULL;
     541      116261 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
     542      116261 :         u64 len = end - start + 1;
     543             :         u64 gen;
     544             :         int ret;
     545             :         int testend = 1;
     546             :         unsigned long flags;
     547             :         int compressed = 0;
     548             :         bool modified;
     549             : 
     550      116261 :         WARN_ON(end < start);
     551      116266 :         if (end == (u64)-1) {
     552             :                 len = (u64)-1;
     553             :                 testend = 0;
     554             :         }
     555             :         while (1) {
     556             :                 int no_splits = 0;
     557             : 
     558             :                 modified = false;
     559      178927 :                 if (!split)
     560      120952 :                         split = alloc_extent_map();
     561      178913 :                 if (!split2)
     562      145724 :                         split2 = alloc_extent_map();
     563      178911 :                 if (!split || !split2)
     564             :                         no_splits = 1;
     565             : 
     566      178911 :                 write_lock(&em_tree->lock);
     567      178935 :                 em = lookup_extent_mapping(em_tree, start, len);
     568      178930 :                 if (!em) {
     569             :                         write_unlock(&em_tree->lock);
     570             :                         break;
     571             :                 }
     572       62660 :                 flags = em->flags;
     573       62660 :                 gen = em->generation;
     574       63734 :                 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
     575           0 :                         if (testend && em->start + em->len >= start + len) {
     576           0 :                                 free_extent_map(em);
     577             :                                 write_unlock(&em_tree->lock);
     578             :                                 break;
     579             :                         }
     580           0 :                         start = em->start + em->len;
     581           0 :                         if (testend)
     582           0 :                                 len = start + len - (em->start + em->len);
     583           0 :                         free_extent_map(em);
     584             :                         write_unlock(&em_tree->lock);
     585           0 :                         continue;
     586             :                 }
     587             :                 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
     588             :                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     589             :                 clear_bit(EXTENT_FLAG_LOGGING, &flags);
     590      125328 :                 modified = !list_empty(&em->list);
     591       62664 :                 if (no_splits)
     592             :                         goto next;
     593             : 
     594       62664 :                 if (em->start < start) {
     595       29457 :                         split->start = em->start;
     596       29457 :                         split->len = start - em->start;
     597             : 
     598       29457 :                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
     599        2294 :                                 split->orig_start = em->orig_start;
     600        2294 :                                 split->block_start = em->block_start;
     601             : 
     602        2294 :                                 if (compressed)
     603          54 :                                         split->block_len = em->block_len;
     604             :                                 else
     605        2240 :                                         split->block_len = split->len;
     606        2294 :                                 split->orig_block_len = max(split->block_len,
     607             :                                                 em->orig_block_len);
     608        2294 :                                 split->ram_bytes = em->ram_bytes;
     609             :                         } else {
     610       27163 :                                 split->orig_start = split->start;
     611       27163 :                                 split->block_len = 0;
     612       27163 :                                 split->block_start = em->block_start;
     613       27163 :                                 split->orig_block_len = 0;
     614       27163 :                                 split->ram_bytes = split->len;
     615             :                         }
     616             : 
     617       29457 :                         split->generation = gen;
     618       29457 :                         split->bdev = em->bdev;
     619       29457 :                         split->flags = flags;
     620       29457 :                         split->compress_type = em->compress_type;
     621       29457 :                         replace_extent_mapping(em_tree, em, split, modified);
     622       29457 :                         free_extent_map(split);
     623             :                         split = split2;
     624             :                         split2 = NULL;
     625             :                 }
     626       62663 :                 if (testend && em->start + em->len > start + len) {
     627        4689 :                         u64 diff = start + len - em->start;
     628             : 
     629        4689 :                         split->start = start + len;
     630        4689 :                         split->len = em->start + em->len - (start + len);
     631        4689 :                         split->bdev = em->bdev;
     632        4689 :                         split->flags = flags;
     633        4689 :                         split->compress_type = em->compress_type;
     634        4689 :                         split->generation = gen;
     635             : 
     636        4689 :                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
     637        2777 :                                 split->orig_block_len = max(em->block_len,
     638             :                                                     em->orig_block_len);
     639             : 
     640        2777 :                                 split->ram_bytes = em->ram_bytes;
     641        2777 :                                 if (compressed) {
     642          11 :                                         split->block_len = em->block_len;
     643          11 :                                         split->block_start = em->block_start;
     644          11 :                                         split->orig_start = em->orig_start;
     645             :                                 } else {
     646        2766 :                                         split->block_len = split->len;
     647        5532 :                                         split->block_start = em->block_start
     648        2766 :                                                 + diff;
     649        2766 :                                         split->orig_start = em->orig_start;
     650             :                                 }
     651             :                         } else {
     652        1912 :                                 split->ram_bytes = split->len;
     653        1912 :                                 split->orig_start = split->start;
     654        1912 :                                 split->block_len = 0;
     655        1912 :                                 split->block_start = em->block_start;
     656        1912 :                                 split->orig_block_len = 0;
     657             :                         }
     658             : 
     659        4689 :                         if (extent_map_in_tree(em)) {
     660        3246 :                                 replace_extent_mapping(em_tree, em, split,
     661             :                                                        modified);
     662             :                         } else {
     663        1443 :                                 ret = add_extent_mapping(em_tree, split,
     664             :                                                          modified);
     665             :                                 ASSERT(ret == 0); /* Logic error */
     666             :                         }
     667        4689 :                         free_extent_map(split);
     668             :                         split = NULL;
     669             :                 }
     670             : next:
     671       62663 :                 if (extent_map_in_tree(em))
     672       29960 :                         remove_extent_mapping(em_tree, em);
     673             :                 write_unlock(&em_tree->lock);
     674             : 
     675             :                 /* once for us */
     676       62664 :                 free_extent_map(em);
     677             :                 /* once for the tree*/
     678       62663 :                 free_extent_map(em);
     679             :         }
     680      116269 :         if (split)
     681      116270 :                 free_extent_map(split);
     682      116270 :         if (split2)
     683      116270 :                 free_extent_map(split2);
     684      116271 : }
     685             : 
     686             : /*
     687             :  * this is very complex, but the basic idea is to drop all extents
     688             :  * in the range start - end.  hint_block is filled in with a block number
     689             :  * that would be a good hint to the block allocator for this file.
     690             :  *
     691             :  * If an extent intersects the range but is not entirely inside the range
     692             :  * it is either truncated or split.  Anything entirely inside the range
     693             :  * is deleted from the tree.
     694             :  */
     695       64833 : int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
     696             :                          struct btrfs_root *root, struct inode *inode,
     697             :                          struct btrfs_path *path, u64 start, u64 end,
     698             :                          u64 *drop_end, int drop_cache,
     699             :                          int replace_extent,
     700             :                          u32 extent_item_size,
     701             :                          int *key_inserted)
     702             : {
     703      122552 :         struct extent_buffer *leaf;
     704             :         struct btrfs_file_extent_item *fi;
     705             :         struct btrfs_key key;
     706             :         struct btrfs_key new_key;
     707             :         u64 ino = btrfs_ino(inode);
     708             :         u64 search_start = start;
     709             :         u64 disk_bytenr = 0;
     710             :         u64 num_bytes = 0;
     711             :         u64 extent_offset = 0;
     712             :         u64 extent_end = 0;
     713             :         int del_nr = 0;
     714             :         int del_slot = 0;
     715             :         int extent_type;
     716             :         int recow;
     717             :         int ret;
     718             :         int modify_tree = -1;
     719             :         int update_refs;
     720             :         int found = 0;
     721             :         int leafs_visited = 0;
     722             : 
     723       64833 :         if (drop_cache)
     724        9063 :                 btrfs_drop_extent_cache(inode, start, end - 1, 0);
     725             : 
     726       64837 :         if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
     727             :                 modify_tree = 0;
     728             : 
     729       71215 :         update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
     730        6378 :                        root == root->fs_info->tree_root);
     731             :         while (1) {
     732             :                 recow = 0;
     733       65386 :                 ret = btrfs_lookup_file_extent(trans, root, path, ino,
     734             :                                                search_start, modify_tree);
     735       65394 :                 if (ret < 0)
     736             :                         break;
     737       65395 :                 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
     738       55498 :                         leaf = path->nodes[0];
     739       55498 :                         btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
     740      110997 :                         if (key.objectid == ino &&
     741       55499 :                             key.type == BTRFS_EXTENT_DATA_KEY)
     742       38625 :                                 path->slots[0]--;
     743             :                 }
     744             :                 ret = 0;
     745       65395 :                 leafs_visited++;
     746             : next_slot:
     747       76119 :                 leaf = path->nodes[0];
     748      152238 :                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
     749        5778 :                         BUG_ON(del_nr > 0);
     750        5778 :                         ret = btrfs_next_leaf(root, path);
     751        5781 :                         if (ret < 0)
     752             :                                 break;
     753        5781 :                         if (ret > 0) {
     754             :                                 ret = 0;
     755             :                                 break;
     756             :                         }
     757         271 :                         leafs_visited++;
     758         271 :                         leaf = path->nodes[0];
     759             :                         recow = 1;
     760             :                 }
     761             : 
     762       70612 :                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     763      120858 :                 if (key.objectid > ino ||
     764      100496 :                     key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
     765             :                         break;
     766             : 
     767      100362 :                 fi = btrfs_item_ptr(leaf, path->slots[0],
     768             :                                     struct btrfs_file_extent_item);
     769       50179 :                 extent_type = btrfs_file_extent_type(leaf, fi);
     770             : 
     771       50179 :                 if (extent_type == BTRFS_FILE_EXTENT_REG ||
     772             :                     extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
     773             :                         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
     774             :                         num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
     775             :                         extent_offset = btrfs_file_extent_offset(leaf, fi);
     776      100267 :                         extent_end = key.offset +
     777             :                                 btrfs_file_extent_num_bytes(leaf, fi);
     778          44 :                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
     779          88 :                         extent_end = key.offset +
     780          44 :                                 btrfs_file_extent_inline_len(leaf,
     781             :                                                      path->slots[0], fi);
     782             :                 } else {
     783           0 :                         WARN_ON(1);
     784             :                         extent_end = search_start;
     785             :                 }
     786             : 
     787             :                 /*
     788             :                  * Don't skip extent items representing 0 byte lengths. They
     789             :                  * used to be created (bug) if while punching holes we hit
     790             :                  * -ENOSPC condition. So if we find one here, just ensure we
     791             :                  * delete it, otherwise we would insert a new file extent item
     792             :                  * with the same key (offset) as that 0 bytes length file
     793             :                  * extent item in the call to setup_items_for_insert() later
     794             :                  * in this function.
     795             :                  */
     796       50178 :                 if (extent_end == key.offset && extent_end >= search_start)
     797             :                         goto delete_extent_item;
     798             : 
     799       50178 :                 if (extent_end <= search_start) {
     800        8837 :                         path->slots[0]++;
     801        8837 :                         goto next_slot;
     802             :                 }
     803             : 
     804             :                 found = 1;
     805       41341 :                 search_start = max(key.offset, start);
     806       41341 :                 if (recow || !modify_tree) {
     807             :                         modify_tree = -1;
     808         482 :                         btrfs_release_path(path);
     809         482 :                         continue;
     810             :                 }
     811             : 
     812             :                 /*
     813             :                  *     | - range to drop - |
     814             :                  *  | -------- extent -------- |
     815             :                  */
     816       40859 :                 if (start > key.offset && end < extent_end) {
     817        1239 :                         BUG_ON(del_nr > 0);
     818        1239 :                         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
     819             :                                 ret = -EOPNOTSUPP;
     820             :                                 break;
     821             :                         }
     822             : 
     823        1239 :                         memcpy(&new_key, &key, sizeof(new_key));
     824        1239 :                         new_key.offset = start;
     825        1239 :                         ret = btrfs_duplicate_item(trans, root, path,
     826             :                                                    &new_key);
     827        1239 :                         if (ret == -EAGAIN) {
     828           2 :                                 btrfs_release_path(path);
     829           2 :                                 continue;
     830             :                         }
     831        1237 :                         if (ret < 0)
     832             :                                 break;
     833             : 
     834        1237 :                         leaf = path->nodes[0];
     835        2474 :                         fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
     836             :                                             struct btrfs_file_extent_item);
     837        1237 :                         btrfs_set_file_extent_num_bytes(leaf, fi,
     838        1237 :                                                         start - key.offset);
     839             : 
     840        2474 :                         fi = btrfs_item_ptr(leaf, path->slots[0],
     841             :                                             struct btrfs_file_extent_item);
     842             : 
     843        1237 :                         extent_offset += start - key.offset;
     844             :                         btrfs_set_file_extent_offset(leaf, fi, extent_offset);
     845        1237 :                         btrfs_set_file_extent_num_bytes(leaf, fi,
     846             :                                                         extent_end - start);
     847        1237 :                         btrfs_mark_buffer_dirty(leaf);
     848             : 
     849        1237 :                         if (update_refs && disk_bytenr > 0) {
     850         675 :                                 ret = btrfs_inc_extent_ref(trans, root,
     851             :                                                 disk_bytenr, num_bytes, 0,
     852             :                                                 root->root_key.objectid,
     853             :                                                 new_key.objectid,
     854             :                                                 start - extent_offset, 1);
     855         675 :                                 BUG_ON(ret); /* -ENOMEM */
     856             :                         }
     857        1237 :                         key.offset = start;
     858             :                 }
     859             :                 /*
     860             :                  *  | ---- range to drop ----- |
     861             :                  *      | -------- extent -------- |
     862             :                  */
     863       40857 :                 if (start <= key.offset && end < extent_end) {
     864        3244 :                         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
     865             :                                 ret = -EOPNOTSUPP;
     866             :                                 break;
     867             :                         }
     868             : 
     869        3244 :                         memcpy(&new_key, &key, sizeof(new_key));
     870        3244 :                         new_key.offset = end;
     871        3244 :                         btrfs_set_item_key_safe(root, path, &new_key);
     872             : 
     873        3244 :                         extent_offset += end - key.offset;
     874             :                         btrfs_set_file_extent_offset(leaf, fi, extent_offset);
     875        3244 :                         btrfs_set_file_extent_num_bytes(leaf, fi,
     876             :                                                         extent_end - end);
     877        3244 :                         btrfs_mark_buffer_dirty(leaf);
     878        3244 :                         if (update_refs && disk_bytenr > 0)
     879        1377 :                                 inode_sub_bytes(inode, end - key.offset);
     880             :                         break;
     881             :                 }
     882             : 
     883             :                 search_start = extent_end;
     884             :                 /*
     885             :                  *       | ---- range to drop ----- |
     886             :                  *  | -------- extent -------- |
     887             :                  */
     888       37613 :                 if (start > key.offset && end >= extent_end) {
     889       28524 :                         BUG_ON(del_nr > 0);
     890       28524 :                         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
     891             :                                 ret = -EOPNOTSUPP;
     892             :                                 break;
     893             :                         }
     894             : 
     895       28524 :                         btrfs_set_file_extent_num_bytes(leaf, fi,
     896             :                                                         start - key.offset);
     897       28524 :                         btrfs_mark_buffer_dirty(leaf);
     898       28524 :                         if (update_refs && disk_bytenr > 0)
     899        1106 :                                 inode_sub_bytes(inode, extent_end - start);
     900       28524 :                         if (end == extent_end)
     901             :                                 break;
     902             : 
     903         232 :                         path->slots[0]++;
     904         232 :                         goto next_slot;
     905             :                 }
     906             : 
     907             :                 /*
     908             :                  *  | ---- range to drop ----- |
     909             :                  *    | ------ extent ------ |
     910             :                  */
     911        9089 :                 if (start <= key.offset && end >= extent_end) {
     912             : delete_extent_item:
     913        9089 :                         if (del_nr == 0) {
     914        7548 :                                 del_slot = path->slots[0];
     915             :                                 del_nr = 1;
     916             :                         } else {
     917        1541 :                                 BUG_ON(del_slot + del_nr != path->slots[0]);
     918        1541 :                                 del_nr++;
     919             :                         }
     920             : 
     921       18178 :                         if (update_refs &&
     922        9089 :                             extent_type == BTRFS_FILE_EXTENT_INLINE) {
     923          44 :                                 inode_sub_bytes(inode,
     924          44 :                                                 extent_end - key.offset);
     925          44 :                                 extent_end = ALIGN(extent_end,
     926             :                                                    root->sectorsize);
     927        9045 :                         } else if (update_refs && disk_bytenr > 0) {
     928        7437 :                                 ret = btrfs_free_extent(trans, root,
     929             :                                                 disk_bytenr, num_bytes, 0,
     930             :                                                 root->root_key.objectid,
     931        7437 :                                                 key.objectid, key.offset -
     932             :                                                 extent_offset, 0);
     933        7438 :                                 BUG_ON(ret); /* -ENOMEM */
     934        7438 :                                 inode_sub_bytes(inode,
     935        7438 :                                                 extent_end - key.offset);
     936             :                         }
     937             : 
     938        9090 :                         if (end == extent_end)
     939             :                                 break;
     940             : 
     941        3440 :                         if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
     942        1655 :                                 path->slots[0]++;
     943        1655 :                                 goto next_slot;
     944             :                         }
     945             : 
     946          65 :                         ret = btrfs_del_items(trans, root, path, del_slot,
     947             :                                               del_nr);
     948          65 :                         if (ret) {
     949           0 :                                 btrfs_abort_transaction(trans, root, ret);
     950           0 :                                 break;
     951             :                         }
     952             : 
     953             :                         del_nr = 0;
     954             :                         del_slot = 0;
     955             : 
     956          65 :                         btrfs_release_path(path);
     957          65 :                         continue;
     958             :                 }
     959             : 
     960           0 :                 BUG_ON(1);
     961             :         }
     962             : 
     963       64845 :         if (!ret && del_nr > 0) {
     964             :                 /*
     965             :                  * Set path->slots[0] to first slot, so that after the delete
     966             :                  * if items are move off from our leaf to its immediate left or
     967             :                  * right neighbor leafs, we end up with a correct and adjusted
     968             :                  * path->slots[0] for our insertion (if replace_extent != 0).
     969             :                  */
     970        7484 :                 path->slots[0] = del_slot;
     971        7484 :                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
     972        7484 :                 if (ret)
     973           0 :                         btrfs_abort_transaction(trans, root, ret);
     974             :         }
     975             : 
     976       64845 :         leaf = path->nodes[0];
     977             :         /*
     978             :          * If btrfs_del_items() was called, it might have deleted a leaf, in
     979             :          * which case it unlocked our path, so check path->locks[0] matches a
     980             :          * write lock.
     981             :          */
     982      124216 :         if (!ret && replace_extent && leafs_visited == 1 &&
     983       59371 :             (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
     984       55149 :              path->locks[0] == BTRFS_WRITE_LOCK) &&
     985       55149 :             btrfs_leaf_free_space(root, leaf) >=
     986       55149 :             sizeof(struct btrfs_item) + extent_item_size) {
     987             : 
     988       51843 :                 key.objectid = ino;
     989       51843 :                 key.type = BTRFS_EXTENT_DATA_KEY;
     990       51843 :                 key.offset = start;
     991       96556 :                 if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
     992             :                         struct btrfs_key slot_key;
     993             : 
     994       44712 :                         btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
     995       44712 :                         if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
     996       27786 :                                 path->slots[0]++;
     997             :                 }
     998       51843 :                 setup_items_for_insert(root, path, &key,
     999             :                                        &extent_item_size,
    1000             :                                        extent_item_size,
    1001             :                                        sizeof(struct btrfs_item) +
    1002             :                                        extent_item_size, 1);
    1003       51841 :                 *key_inserted = 1;
    1004             :         }
    1005             : 
    1006       64843 :         if (!replace_extent || !(*key_inserted))
    1007       13003 :                 btrfs_release_path(path);
    1008       64840 :         if (drop_end)
    1009         112 :                 *drop_end = found ? min(end, extent_end) : end;
    1010       64840 :         return ret;
    1011             : }
    1012             : 
    1013        5100 : int btrfs_drop_extents(struct btrfs_trans_handle *trans,
    1014             :                        struct btrfs_root *root, struct inode *inode, u64 start,
    1015             :                        u64 end, int drop_cache)
    1016             : {
    1017             :         struct btrfs_path *path;
    1018             :         int ret;
    1019             : 
    1020        5100 :         path = btrfs_alloc_path();
    1021        5100 :         if (!path)
    1022             :                 return -ENOMEM;
    1023        5100 :         ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
    1024             :                                    drop_cache, 0, 0, NULL);
    1025        5099 :         btrfs_free_path(path);
    1026        5099 :         return ret;
    1027             : }
    1028             : 
    1029       23125 : static int extent_mergeable(struct extent_buffer *leaf, int slot,
    1030             :                             u64 objectid, u64 bytenr, u64 orig_offset,
    1031             :                             u64 *start, u64 *end)
    1032             : {
    1033             :         struct btrfs_file_extent_item *fi;
    1034             :         struct btrfs_key key;
    1035             :         u64 extent_end;
    1036             : 
    1037       23125 :         if (slot < 0 || slot >= btrfs_header_nritems(leaf))
    1038             :                 return 0;
    1039             : 
    1040       11524 :         btrfs_item_key_to_cpu(leaf, &key, slot);
    1041       11524 :         if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
    1042             :                 return 0;
    1043             : 
    1044        3522 :         fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    1045        5199 :         if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
    1046          35 :             btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
    1047          70 :             btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
    1048          35 :             btrfs_file_extent_compression(leaf, fi) ||
    1049          35 :             btrfs_file_extent_encryption(leaf, fi) ||
    1050             :             btrfs_file_extent_other_encoding(leaf, fi))
    1051             :                 return 0;
    1052             : 
    1053          70 :         extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
    1054          35 :         if ((*start && *start != key.offset) || (*end && *end != extent_end))
    1055             :                 return 0;
    1056             : 
    1057          35 :         *start = key.offset;
    1058          35 :         *end = extent_end;
    1059          35 :         return 1;
    1060             : }
    1061             : 
    1062             : /*
    1063             :  * Mark extent in the range start - end as written.
    1064             :  *
    1065             :  * This changes extent type from 'pre-allocated' to 'regular'. If only
    1066             :  * part of extent is marked as written, the extent will be split into
    1067             :  * two or three.
    1068             :  */
    1069        5625 : int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
    1070             :                               struct inode *inode, u64 start, u64 end)
    1071             : {
    1072        5625 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1073             :         struct extent_buffer *leaf;
    1074             :         struct btrfs_path *path;
    1075             :         struct btrfs_file_extent_item *fi;
    1076             :         struct btrfs_key key;
    1077             :         struct btrfs_key new_key;
    1078             :         u64 bytenr;
    1079             :         u64 num_bytes;
    1080             :         u64 extent_end;
    1081             :         u64 orig_offset;
    1082             :         u64 other_start;
    1083             :         u64 other_end;
    1084             :         u64 split;
    1085             :         int del_nr = 0;
    1086             :         int del_slot = 0;
    1087             :         int recow;
    1088             :         int ret;
    1089             :         u64 ino = btrfs_ino(inode);
    1090             : 
    1091        5625 :         path = btrfs_alloc_path();
    1092        5624 :         if (!path)
    1093             :                 return -ENOMEM;
    1094             : again:
    1095             :         recow = 0;
    1096             :         split = start;
    1097        5625 :         key.objectid = ino;
    1098        5625 :         key.type = BTRFS_EXTENT_DATA_KEY;
    1099        5625 :         key.offset = split;
    1100             : 
    1101        5625 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    1102        5625 :         if (ret < 0)
    1103             :                 goto out;
    1104        5625 :         if (ret > 0 && path->slots[0] > 0)
    1105         441 :                 path->slots[0]--;
    1106             : 
    1107        5625 :         leaf = path->nodes[0];
    1108        5625 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    1109        5625 :         BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
    1110       11250 :         fi = btrfs_item_ptr(leaf, path->slots[0],
    1111             :                             struct btrfs_file_extent_item);
    1112        5625 :         BUG_ON(btrfs_file_extent_type(leaf, fi) !=
    1113             :                BTRFS_FILE_EXTENT_PREALLOC);
    1114       11250 :         extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
    1115        5625 :         BUG_ON(key.offset > start || extent_end < end);
    1116             : 
    1117             :         bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
    1118             :         num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
    1119       11250 :         orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
    1120        5625 :         memcpy(&new_key, &key, sizeof(new_key));
    1121             : 
    1122        5625 :         if (start == key.offset && end < extent_end) {
    1123         179 :                 other_start = 0;
    1124         179 :                 other_end = start;
    1125         179 :                 if (extent_mergeable(leaf, path->slots[0] - 1,
    1126             :                                      ino, bytenr, orig_offset,
    1127             :                                      &other_start, &other_end)) {
    1128          29 :                         new_key.offset = end;
    1129          29 :                         btrfs_set_item_key_safe(root, path, &new_key);
    1130          58 :                         fi = btrfs_item_ptr(leaf, path->slots[0],
    1131             :                                             struct btrfs_file_extent_item);
    1132          29 :                         btrfs_set_file_extent_generation(leaf, fi,
    1133             :                                                          trans->transid);
    1134          29 :                         btrfs_set_file_extent_num_bytes(leaf, fi,
    1135             :                                                         extent_end - end);
    1136          29 :                         btrfs_set_file_extent_offset(leaf, fi,
    1137             :                                                      end - orig_offset);
    1138          58 :                         fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
    1139             :                                             struct btrfs_file_extent_item);
    1140          29 :                         btrfs_set_file_extent_generation(leaf, fi,
    1141             :                                                          trans->transid);
    1142          29 :                         btrfs_set_file_extent_num_bytes(leaf, fi,
    1143             :                                                         end - other_start);
    1144          29 :                         btrfs_mark_buffer_dirty(leaf);
    1145          29 :                         goto out;
    1146             :                 }
    1147             :         }
    1148             : 
    1149        5596 :         if (start > key.offset && end == extent_end) {
    1150         204 :                 other_start = end;
    1151         204 :                 other_end = 0;
    1152         204 :                 if (extent_mergeable(leaf, path->slots[0] + 1,
    1153             :                                      ino, bytenr, orig_offset,
    1154             :                                      &other_start, &other_end)) {
    1155           8 :                         fi = btrfs_item_ptr(leaf, path->slots[0],
    1156             :                                             struct btrfs_file_extent_item);
    1157           4 :                         btrfs_set_file_extent_num_bytes(leaf, fi,
    1158           4 :                                                         start - key.offset);
    1159           4 :                         btrfs_set_file_extent_generation(leaf, fi,
    1160             :                                                          trans->transid);
    1161           4 :                         path->slots[0]++;
    1162           4 :                         new_key.offset = start;
    1163           4 :                         btrfs_set_item_key_safe(root, path, &new_key);
    1164             : 
    1165           8 :                         fi = btrfs_item_ptr(leaf, path->slots[0],
    1166             :                                             struct btrfs_file_extent_item);
    1167           4 :                         btrfs_set_file_extent_generation(leaf, fi,
    1168             :                                                          trans->transid);
    1169           4 :                         btrfs_set_file_extent_num_bytes(leaf, fi,
    1170             :                                                         other_end - start);
    1171           4 :                         btrfs_set_file_extent_offset(leaf, fi,
    1172             :                                                      start - orig_offset);
    1173           4 :                         btrfs_mark_buffer_dirty(leaf);
    1174           4 :                         goto out;
    1175             :                 }
    1176             :         }
    1177             : 
    1178        6416 :         while (start > key.offset || end < extent_end) {
    1179         824 :                 if (key.offset == start)
    1180             :                         split = end;
    1181             : 
    1182         824 :                 new_key.offset = split;
    1183         824 :                 ret = btrfs_duplicate_item(trans, root, path, &new_key);
    1184         824 :                 if (ret == -EAGAIN) {
    1185           0 :                         btrfs_release_path(path);
    1186           0 :                         goto again;
    1187             :                 }
    1188         824 :                 if (ret < 0) {
    1189           0 :                         btrfs_abort_transaction(trans, root, ret);
    1190           0 :                         goto out;
    1191             :                 }
    1192             : 
    1193         824 :                 leaf = path->nodes[0];
    1194        1648 :                 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
    1195             :                                     struct btrfs_file_extent_item);
    1196         824 :                 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
    1197         824 :                 btrfs_set_file_extent_num_bytes(leaf, fi,
    1198         824 :                                                 split - key.offset);
    1199             : 
    1200        1648 :                 fi = btrfs_item_ptr(leaf, path->slots[0],
    1201             :                                     struct btrfs_file_extent_item);
    1202             : 
    1203         824 :                 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
    1204         824 :                 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
    1205         824 :                 btrfs_set_file_extent_num_bytes(leaf, fi,
    1206             :                                                 extent_end - split);
    1207         824 :                 btrfs_mark_buffer_dirty(leaf);
    1208             : 
    1209         824 :                 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
    1210             :                                            root->root_key.objectid,
    1211             :                                            ino, orig_offset, 1);
    1212         824 :                 BUG_ON(ret); /* -ENOMEM */
    1213             : 
    1214         824 :                 if (split == start) {
    1215         437 :                         key.offset = start;
    1216             :                 } else {
    1217         387 :                         BUG_ON(start != key.offset);
    1218         387 :                         path->slots[0]--;
    1219             :                         extent_end = end;
    1220             :                 }
    1221             :                 recow = 1;
    1222             :         }
    1223             : 
    1224        5592 :         other_start = end;
    1225        5592 :         other_end = 0;
    1226        5592 :         if (extent_mergeable(leaf, path->slots[0] + 1,
    1227             :                              ino, bytenr, orig_offset,
    1228             :                              &other_start, &other_end)) {
    1229           1 :                 if (recow) {
    1230           0 :                         btrfs_release_path(path);
    1231           0 :                         goto again;
    1232             :                 }
    1233           1 :                 extent_end = other_end;
    1234           1 :                 del_slot = path->slots[0] + 1;
    1235           1 :                 del_nr++;
    1236           1 :                 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
    1237             :                                         0, root->root_key.objectid,
    1238             :                                         ino, orig_offset, 0);
    1239           1 :                 BUG_ON(ret); /* -ENOMEM */
    1240             :         }
    1241        5592 :         other_start = 0;
    1242        5592 :         other_end = start;
    1243        5592 :         if (extent_mergeable(leaf, path->slots[0] - 1,
    1244             :                              ino, bytenr, orig_offset,
    1245             :                              &other_start, &other_end)) {
    1246           1 :                 if (recow) {
    1247           0 :                         btrfs_release_path(path);
    1248           0 :                         goto again;
    1249             :                 }
    1250           1 :                 key.offset = other_start;
    1251           1 :                 del_slot = path->slots[0];
    1252           1 :                 del_nr++;
    1253           1 :                 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
    1254             :                                         0, root->root_key.objectid,
    1255             :                                         ino, orig_offset, 0);
    1256           1 :                 BUG_ON(ret); /* -ENOMEM */
    1257             :         }
    1258        5592 :         if (del_nr == 0) {
    1259       11180 :                 fi = btrfs_item_ptr(leaf, path->slots[0],
    1260             :                            struct btrfs_file_extent_item);
    1261             :                 btrfs_set_file_extent_type(leaf, fi,
    1262             :                                            BTRFS_FILE_EXTENT_REG);
    1263        5590 :                 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
    1264        5590 :                 btrfs_mark_buffer_dirty(leaf);
    1265             :         } else {
    1266           4 :                 fi = btrfs_item_ptr(leaf, del_slot - 1,
    1267             :                            struct btrfs_file_extent_item);
    1268             :                 btrfs_set_file_extent_type(leaf, fi,
    1269             :                                            BTRFS_FILE_EXTENT_REG);
    1270           2 :                 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
    1271           2 :                 btrfs_set_file_extent_num_bytes(leaf, fi,
    1272           2 :                                                 extent_end - key.offset);
    1273           2 :                 btrfs_mark_buffer_dirty(leaf);
    1274             : 
    1275           2 :                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
    1276           2 :                 if (ret < 0) {
    1277           0 :                         btrfs_abort_transaction(trans, root, ret);
    1278           0 :                         goto out;
    1279             :                 }
    1280             :         }
    1281             : out:
    1282        5625 :         btrfs_free_path(path);
    1283        5625 :         return 0;
    1284             : }
    1285             : 
    1286             : /*
    1287             :  * on error we return an unlocked page and the error value
    1288             :  * on success we return a locked page and 0
    1289             :  */
    1290      236072 : static int prepare_uptodate_page(struct page *page, u64 pos,
    1291             :                                  bool force_uptodate)
    1292             : {
    1293             :         int ret = 0;
    1294             : 
    1295      314331 :         if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
    1296             :             !PageUptodate(page)) {
    1297       25436 :                 ret = btrfs_readpage(NULL, page);
    1298       25436 :                 if (ret)
    1299             :                         return ret;
    1300       25436 :                 lock_page(page);
    1301       25436 :                 if (!PageUptodate(page)) {
    1302           0 :                         unlock_page(page);
    1303           0 :                         return -EIO;
    1304             :                 }
    1305             :         }
    1306             :         return 0;
    1307             : }
    1308             : 
    1309             : /*
    1310             :  * this just gets pages into the page cache and locks them down.
    1311             :  */
    1312      118036 : static noinline int prepare_pages(struct inode *inode, struct page **pages,
    1313             :                                   size_t num_pages, loff_t pos,
    1314             :                                   size_t write_bytes, bool force_uptodate)
    1315             : {
    1316             :         int i;
    1317      118036 :         unsigned long index = pos >> PAGE_CACHE_SHIFT;
    1318      118036 :         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
    1319             :         int err = 0;
    1320             :         int faili;
    1321             : 
    1322     1150365 :         for (i = 0; i < num_pages; i++) {
    1323     2300721 :                 pages[i] = find_or_create_page(inode->i_mapping, index + i,
    1324             :                                                mask | __GFP_WRITE);
    1325     1150360 :                 if (!pages[i]) {
    1326           0 :                         faili = i - 1;
    1327             :                         err = -ENOMEM;
    1328             :                         goto fail;
    1329             :                 }
    1330             : 
    1331     1150360 :                 if (i == 0)
    1332      118036 :                         err = prepare_uptodate_page(pages[i], pos,
    1333             :                                                     force_uptodate);
    1334     1150363 :                 if (i == num_pages - 1)
    1335      118036 :                         err = prepare_uptodate_page(pages[i],
    1336             :                                                     pos + write_bytes, false);
    1337     1150363 :                 if (err) {
    1338           0 :                         page_cache_release(pages[i]);
    1339           0 :                         faili = i - 1;
    1340             :                         goto fail;
    1341             :                 }
    1342     1150363 :                 wait_on_page_writeback(pages[i]);
    1343             :         }
    1344             : 
    1345             :         return 0;
    1346             : fail:
    1347           0 :         while (faili >= 0) {
    1348           0 :                 unlock_page(pages[faili]);
    1349           0 :                 page_cache_release(pages[faili]);
    1350           0 :                 faili--;
    1351             :         }
    1352             :         return err;
    1353             : 
    1354             : }
    1355             : 
    1356             : /*
    1357             :  * This function locks the extent and properly waits for data=ordered extents
    1358             :  * to finish before allowing the pages to be modified if need.
    1359             :  *
    1360             :  * The return value:
    1361             :  * 1 - the extent is locked
    1362             :  * 0 - the extent is not locked, and everything is OK
    1363             :  * -EAGAIN - need re-prepare the pages
    1364             :  * the other < 0 number - Something wrong happens
    1365             :  */
    1366             : static noinline int
    1367      118036 : lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
    1368             :                                 size_t num_pages, loff_t pos,
    1369             :                                 u64 *lockstart, u64 *lockend,
    1370             :                                 struct extent_state **cached_state)
    1371             : {
    1372             :         u64 start_pos;
    1373             :         u64 last_pos;
    1374             :         int i;
    1375             :         int ret = 0;
    1376             : 
    1377      118036 :         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
    1378      118036 :         last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
    1379             : 
    1380      118036 :         if (start_pos < inode->i_size) {
    1381             :                 struct btrfs_ordered_extent *ordered;
    1382       35820 :                 lock_extent_bits(&BTRFS_I(inode)->io_tree,
    1383             :                                  start_pos, last_pos, 0, cached_state);
    1384       35820 :                 ordered = btrfs_lookup_ordered_range(inode, start_pos,
    1385       35820 :                                                      last_pos - start_pos + 1);
    1386       35828 :                 if (ordered &&
    1387          16 :                     ordered->file_offset + ordered->len > start_pos &&
    1388             :                     ordered->file_offset <= last_pos) {
    1389           8 :                         unlock_extent_cached(&BTRFS_I(inode)->io_tree,
    1390             :                                              start_pos, last_pos,
    1391             :                                              cached_state, GFP_NOFS);
    1392          16 :                         for (i = 0; i < num_pages; i++) {
    1393           8 :                                 unlock_page(pages[i]);
    1394           8 :                                 page_cache_release(pages[i]);
    1395             :                         }
    1396           8 :                         btrfs_start_ordered_extent(inode, ordered, 1);
    1397           8 :                         btrfs_put_ordered_extent(ordered);
    1398           8 :                         return -EAGAIN;
    1399             :                 }
    1400       35812 :                 if (ordered)
    1401           0 :                         btrfs_put_ordered_extent(ordered);
    1402             : 
    1403       35812 :                 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
    1404             :                                   last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
    1405             :                                   EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
    1406             :                                   0, 0, cached_state, GFP_NOFS);
    1407       35812 :                 *lockstart = start_pos;
    1408       35812 :                 *lockend = last_pos;
    1409             :                 ret = 1;
    1410             :         }
    1411             : 
    1412     1268388 :         for (i = 0; i < num_pages; i++) {
    1413     1150356 :                 if (clear_page_dirty_for_io(pages[i]))
    1414       31901 :                         account_page_redirty(pages[i]);
    1415     1150362 :                 set_page_extent_mapped(pages[i]);
    1416     2300720 :                 WARN_ON(!PageLocked(pages[i]));
    1417             :         }
    1418             : 
    1419             :         return ret;
    1420             : }
    1421             : 
    1422           0 : static noinline int check_can_nocow(struct inode *inode, loff_t pos,
    1423             :                                     size_t *write_bytes)
    1424             : {
    1425           0 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1426             :         struct btrfs_ordered_extent *ordered;
    1427             :         u64 lockstart, lockend;
    1428             :         u64 num_bytes;
    1429             :         int ret;
    1430             : 
    1431           0 :         ret = btrfs_start_nocow_write(root);
    1432           0 :         if (!ret)
    1433             :                 return -ENOSPC;
    1434             : 
    1435           0 :         lockstart = round_down(pos, root->sectorsize);
    1436           0 :         lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
    1437             : 
    1438             :         while (1) {
    1439           0 :                 lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
    1440           0 :                 ordered = btrfs_lookup_ordered_range(inode, lockstart,
    1441           0 :                                                      lockend - lockstart + 1);
    1442           0 :                 if (!ordered) {
    1443             :                         break;
    1444             :                 }
    1445           0 :                 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
    1446           0 :                 btrfs_start_ordered_extent(inode, ordered, 1);
    1447           0 :                 btrfs_put_ordered_extent(ordered);
    1448           0 :         }
    1449             : 
    1450           0 :         num_bytes = lockend - lockstart + 1;
    1451           0 :         ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
    1452           0 :         if (ret <= 0) {
    1453             :                 ret = 0;
    1454           0 :                 btrfs_end_nocow_write(root);
    1455             :         } else {
    1456           0 :                 *write_bytes = min_t(size_t, *write_bytes ,
    1457             :                                      num_bytes - pos + lockstart);
    1458             :         }
    1459             : 
    1460           0 :         unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
    1461             : 
    1462           0 :         return ret;
    1463             : }
    1464             : 
    1465      114088 : static noinline ssize_t __btrfs_buffered_write(struct file *file,
    1466      346204 :                                                struct iov_iter *i,
    1467             :                                                loff_t pos)
    1468             : {
    1469             :         struct inode *inode = file_inode(file);
    1470      114088 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1471             :         struct page **pages = NULL;
    1472      114088 :         struct extent_state *cached_state = NULL;
    1473             :         u64 release_bytes = 0;
    1474             :         u64 lockstart;
    1475             :         u64 lockend;
    1476             :         unsigned long first_index;
    1477             :         size_t num_written = 0;
    1478             :         int nrptrs;
    1479             :         int ret = 0;
    1480             :         bool only_release_metadata = false;
    1481             :         bool force_page_uptodate = false;
    1482             :         bool need_unlock;
    1483             : 
    1484      114088 :         nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
    1485             :                      PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
    1486             :                      (sizeof(struct page *)));
    1487      228176 :         nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
    1488      114088 :         nrptrs = max(nrptrs, 8);
    1489      114088 :         pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
    1490      114088 :         if (!pages)
    1491             :                 return -ENOMEM;
    1492             : 
    1493             :         first_index = pos >> PAGE_CACHE_SHIFT;
    1494             : 
    1495      232116 :         while (iov_iter_count(i) > 0) {
    1496      118029 :                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
    1497      118029 :                 size_t write_bytes = min(iov_iter_count(i),
    1498             :                                          nrptrs * (size_t)PAGE_CACHE_SIZE -
    1499             :                                          offset);
    1500      236058 :                 size_t num_pages = (write_bytes + offset +
    1501      118029 :                                     PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
    1502             :                 size_t reserve_bytes;
    1503             :                 size_t dirty_pages;
    1504             :                 size_t copied;
    1505             : 
    1506      118029 :                 WARN_ON(num_pages > nrptrs);
    1507             : 
    1508             :                 /*
    1509             :                  * Fault pages before locking them in prepare_pages
    1510             :                  * to avoid recursive lock
    1511             :                  */
    1512      118029 :                 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
    1513             :                         ret = -EFAULT;
    1514           2 :                         break;
    1515             :                 }
    1516             : 
    1517      118029 :                 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
    1518      118029 :                 ret = btrfs_check_data_free_space(inode, reserve_bytes);
    1519      118030 :                 if (ret == -ENOSPC &&
    1520           0 :                     (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
    1521             :                                               BTRFS_INODE_PREALLOC))) {
    1522           0 :                         ret = check_can_nocow(inode, pos, &write_bytes);
    1523           0 :                         if (ret > 0) {
    1524             :                                 only_release_metadata = true;
    1525             :                                 /*
    1526             :                                  * our prealloc extent may be smaller than
    1527             :                                  * write_bytes, so scale down.
    1528             :                                  */
    1529           0 :                                 num_pages = (write_bytes + offset +
    1530           0 :                                              PAGE_CACHE_SIZE - 1) >>
    1531             :                                         PAGE_CACHE_SHIFT;
    1532           0 :                                 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
    1533             :                                 ret = 0;
    1534             :                         } else {
    1535             :                                 ret = -ENOSPC;
    1536             :                         }
    1537             :                 }
    1538             : 
    1539      118030 :                 if (ret)
    1540             :                         break;
    1541             : 
    1542      118030 :                 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
    1543      118030 :                 if (ret) {
    1544           2 :                         if (!only_release_metadata)
    1545           2 :                                 btrfs_free_reserved_data_space(inode,
    1546             :                                                                reserve_bytes);
    1547             :                         else
    1548           0 :                                 btrfs_end_nocow_write(root);
    1549             :                         break;
    1550             :                 }
    1551             : 
    1552             :                 release_bytes = reserve_bytes;
    1553             :                 need_unlock = false;
    1554             : again:
    1555             :                 /*
    1556             :                  * This is going to setup the pages array with the number of
    1557             :                  * pages we want, so we don't really need to worry about the
    1558             :                  * contents of pages from loop to loop
    1559             :                  */
    1560      118036 :                 ret = prepare_pages(inode, pages, num_pages,
    1561             :                                     pos, write_bytes,
    1562             :                                     force_page_uptodate);
    1563      118036 :                 if (ret)
    1564             :                         break;
    1565             : 
    1566      118036 :                 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
    1567             :                                                       pos, &lockstart, &lockend,
    1568             :                                                       &cached_state);
    1569      118036 :                 if (ret < 0) {
    1570           8 :                         if (ret == -EAGAIN)
    1571             :                                 goto again;
    1572             :                         break;
    1573      118028 :                 } else if (ret > 0) {
    1574             :                         need_unlock = true;
    1575             :                         ret = 0;
    1576             :                 }
    1577             : 
    1578      118028 :                 copied = btrfs_copy_from_user(pos, num_pages,
    1579             :                                            write_bytes, pages, i);
    1580             : 
    1581             :                 /*
    1582             :                  * if we have trouble faulting in the pages, fall
    1583             :                  * back to one page at a time
    1584             :                  */
    1585      118028 :                 if (copied < write_bytes)
    1586             :                         nrptrs = 1;
    1587             : 
    1588      118028 :                 if (copied == 0) {
    1589             :                         force_page_uptodate = true;
    1590             :                         dirty_pages = 0;
    1591             :                 } else {
    1592             :                         force_page_uptodate = false;
    1593      236056 :                         dirty_pages = (copied + offset +
    1594      118028 :                                        PAGE_CACHE_SIZE - 1) >>
    1595             :                                        PAGE_CACHE_SHIFT;
    1596             :                 }
    1597             : 
    1598             :                 /*
    1599             :                  * If we had a short copy we need to release the excess delaloc
    1600             :                  * bytes we reserved.  We need to increment outstanding_extents
    1601             :                  * because btrfs_delalloc_release_space will decrement it, but
    1602             :                  * we still have an outstanding extent for the chunk we actually
    1603             :                  * managed to copy.
    1604             :                  */
    1605      118028 :                 if (num_pages > dirty_pages) {
    1606           0 :                         release_bytes = (num_pages - dirty_pages) <<
    1607             :                                 PAGE_CACHE_SHIFT;
    1608           0 :                         if (copied > 0) {
    1609             :                                 spin_lock(&BTRFS_I(inode)->lock);
    1610           0 :                                 BTRFS_I(inode)->outstanding_extents++;
    1611             :                                 spin_unlock(&BTRFS_I(inode)->lock);
    1612             :                         }
    1613           0 :                         if (only_release_metadata)
    1614           0 :                                 btrfs_delalloc_release_metadata(inode,
    1615             :                                                                 release_bytes);
    1616             :                         else
    1617           0 :                                 btrfs_delalloc_release_space(inode,
    1618             :                                                              release_bytes);
    1619             :                 }
    1620             : 
    1621      118028 :                 release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
    1622             : 
    1623      118028 :                 if (copied > 0)
    1624      118028 :                         ret = btrfs_dirty_pages(root, inode, pages,
    1625             :                                                 dirty_pages, pos, copied,
    1626             :                                                 NULL);
    1627      118028 :                 if (need_unlock)
    1628       35812 :                         unlock_extent_cached(&BTRFS_I(inode)->io_tree,
    1629             :                                              lockstart, lockend, &cached_state,
    1630             :                                              GFP_NOFS);
    1631      118028 :                 if (ret) {
    1632           0 :                         btrfs_drop_pages(pages, num_pages);
    1633           0 :                         break;
    1634             :                 }
    1635             : 
    1636             :                 release_bytes = 0;
    1637      118028 :                 if (only_release_metadata)
    1638           0 :                         btrfs_end_nocow_write(root);
    1639             : 
    1640      118028 :                 if (only_release_metadata && copied > 0) {
    1641           0 :                         u64 lockstart = round_down(pos, root->sectorsize);
    1642           0 :                         u64 lockend = lockstart +
    1643             :                                 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
    1644             : 
    1645           0 :                         set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
    1646             :                                        lockend, EXTENT_NORESERVE, NULL,
    1647             :                                        NULL, GFP_NOFS);
    1648             :                         only_release_metadata = false;
    1649             :                 }
    1650             : 
    1651      118028 :                 btrfs_drop_pages(pages, num_pages);
    1652             : 
    1653      118028 :                 cond_resched();
    1654             : 
    1655      118028 :                 balance_dirty_pages_ratelimited(inode->i_mapping);
    1656      118028 :                 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
    1657       45643 :                         btrfs_btree_balance_dirty(root);
    1658             : 
    1659      118028 :                 pos += copied;
    1660      118028 :                 num_written += copied;
    1661             :         }
    1662             : 
    1663      114089 :         kfree(pages);
    1664             : 
    1665      114088 :         if (release_bytes) {
    1666           0 :                 if (only_release_metadata) {
    1667           0 :                         btrfs_end_nocow_write(root);
    1668           0 :                         btrfs_delalloc_release_metadata(inode, release_bytes);
    1669             :                 } else {
    1670           0 :                         btrfs_delalloc_release_space(inode, release_bytes);
    1671             :                 }
    1672             :         }
    1673             : 
    1674      114088 :         return num_written ? num_written : ret;
    1675             : }
    1676             : 
    1677       25270 : static ssize_t __btrfs_direct_write(struct kiocb *iocb,
    1678       25259 :                                     struct iov_iter *from,
    1679             :                                     loff_t pos)
    1680             : {
    1681       25270 :         struct file *file = iocb->ki_filp;
    1682             :         ssize_t written;
    1683             :         ssize_t written_buffered;
    1684             :         loff_t endbyte;
    1685             :         int err;
    1686             : 
    1687       25270 :         written = generic_file_direct_write(iocb, from, pos);
    1688             : 
    1689       50528 :         if (written < 0 || !iov_iter_count(from))
    1690             :                 return written;
    1691             : 
    1692           0 :         pos += written;
    1693           0 :         written_buffered = __btrfs_buffered_write(file, from, pos);
    1694           0 :         if (written_buffered < 0) {
    1695           0 :                 err = written_buffered;
    1696           0 :                 goto out;
    1697             :         }
    1698           0 :         endbyte = pos + written_buffered - 1;
    1699           0 :         err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
    1700           0 :         if (err)
    1701             :                 goto out;
    1702           0 :         written += written_buffered;
    1703           0 :         iocb->ki_pos = pos + written_buffered;
    1704           0 :         invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
    1705           0 :                                  endbyte >> PAGE_CACHE_SHIFT);
    1706             : out:
    1707           0 :         return written ? written : err;
    1708             : }
    1709             : 
    1710      139356 : static void update_time_for_write(struct inode *inode)
    1711             : {
    1712             :         struct timespec now;
    1713             : 
    1714      139356 :         if (IS_NOCMTIME(inode))
    1715      139358 :                 return;
    1716             : 
    1717      139356 :         now = current_fs_time(inode->i_sb);
    1718      139355 :         if (!timespec_equal(&inode->i_mtime, &now))
    1719       58966 :                 inode->i_mtime = now;
    1720             : 
    1721      139355 :         if (!timespec_equal(&inode->i_ctime, &now))
    1722       58890 :                 inode->i_ctime = now;
    1723             : 
    1724      139355 :         if (IS_I_VERSION(inode))
    1725             :                 inode_inc_iversion(inode);
    1726             : }
    1727             : 
    1728      139379 : static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
    1729      139379 :                                     struct iov_iter *from)
    1730             : {
    1731      139379 :         struct file *file = iocb->ki_filp;
    1732      139356 :         struct inode *inode = file_inode(file);
    1733      139379 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1734             :         u64 start_pos;
    1735             :         u64 end_pos;
    1736             :         ssize_t num_written = 0;
    1737             :         ssize_t err = 0;
    1738      139379 :         size_t count = iov_iter_count(from);
    1739      139379 :         bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
    1740      139379 :         loff_t pos = iocb->ki_pos;
    1741             : 
    1742      139379 :         mutex_lock(&inode->i_mutex);
    1743             : 
    1744      139383 :         current->backing_dev_info = inode->i_mapping->backing_dev_info;
    1745      139383 :         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
    1746      139382 :         if (err) {
    1747           0 :                 mutex_unlock(&inode->i_mutex);
    1748           0 :                 goto out;
    1749             :         }
    1750             : 
    1751      139382 :         if (count == 0) {
    1752          26 :                 mutex_unlock(&inode->i_mutex);
    1753          26 :                 goto out;
    1754             :         }
    1755             : 
    1756             :         iov_iter_truncate(from, count);
    1757             : 
    1758      139356 :         err = file_remove_suid(file);
    1759      139357 :         if (err) {
    1760           0 :                 mutex_unlock(&inode->i_mutex);
    1761           0 :                 goto out;
    1762             :         }
    1763             : 
    1764             :         /*
    1765             :          * If BTRFS flips readonly due to some impossible error
    1766             :          * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
    1767             :          * although we have opened a file as writable, we have
    1768             :          * to stop this write operation to ensure FS consistency.
    1769             :          */
    1770      278714 :         if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
    1771           0 :                 mutex_unlock(&inode->i_mutex);
    1772             :                 err = -EROFS;
    1773           0 :                 goto out;
    1774             :         }
    1775             : 
    1776             :         /*
    1777             :          * We reserve space for updating the inode when we reserve space for the
    1778             :          * extent we are going to write, so we will enospc out there.  We don't
    1779             :          * need to start yet another transaction to update the inode as we will
    1780             :          * update the inode when we finish writing whatever data we write.
    1781             :          */
    1782      139357 :         update_time_for_write(inode);
    1783             : 
    1784      139356 :         start_pos = round_down(pos, root->sectorsize);
    1785      139356 :         if (start_pos > i_size_read(inode)) {
    1786             :                 /* Expand hole size to cover write data, preventing empty gap */
    1787        3075 :                 end_pos = round_up(pos + count, root->sectorsize);
    1788        3075 :                 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
    1789        3075 :                 if (err) {
    1790           0 :                         mutex_unlock(&inode->i_mutex);
    1791           0 :                         goto out;
    1792             :                 }
    1793             :         }
    1794             : 
    1795      139356 :         if (sync)
    1796        1095 :                 atomic_inc(&BTRFS_I(inode)->sync_writers);
    1797             : 
    1798      139358 :         if (unlikely(file->f_flags & O_DIRECT)) {
    1799       25270 :                 num_written = __btrfs_direct_write(iocb, from, pos);
    1800             :         } else {
    1801      114088 :                 num_written = __btrfs_buffered_write(file, from, pos);
    1802      114088 :                 if (num_written > 0)
    1803      114087 :                         iocb->ki_pos = pos + num_written;
    1804             :         }
    1805             : 
    1806      139357 :         mutex_unlock(&inode->i_mutex);
    1807             : 
    1808             :         /*
    1809             :          * we want to make sure fsync finds this change
    1810             :          * but we haven't joined a transaction running right now.
    1811             :          *
    1812             :          * Later on, someone is sure to update the inode and get the
    1813             :          * real transid recorded.
    1814             :          *
    1815             :          * We set last_trans now to the fs_info generation + 1,
    1816             :          * this will either be one more than the running transaction
    1817             :          * or the generation used for the next transaction if there isn't
    1818             :          * one running right now.
    1819             :          *
    1820             :          * We also have to set last_sub_trans to the current log transid,
    1821             :          * otherwise subsequent syncs to a file that's been synced in this
    1822             :          * transaction will appear to have already occured.
    1823             :          */
    1824      139358 :         BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
    1825      139358 :         BTRFS_I(inode)->last_sub_trans = root->log_transid;
    1826      139358 :         if (num_written > 0) {
    1827      139347 :                 err = generic_write_sync(file, pos, num_written);
    1828      139347 :                 if (err < 0)
    1829             :                         num_written = err;
    1830             :         }
    1831             : 
    1832      139358 :         if (sync)
    1833        1095 :                 atomic_dec(&BTRFS_I(inode)->sync_writers);
    1834             : out:
    1835      139383 :         current->backing_dev_info = NULL;
    1836      139383 :         return num_written ? num_written : err;
    1837             : }
    1838             : 
    1839      679049 : int btrfs_release_file(struct inode *inode, struct file *filp)
    1840             : {
    1841      679049 :         if (filp->private_data)
    1842           0 :                 btrfs_ioctl_trans_end(filp);
    1843             :         /*
    1844             :          * ordered_data_close is set by settattr when we are about to truncate
    1845             :          * a file from a non-zero size to a zero size.  This tries to
    1846             :          * flush down new bytes that may have been written if the
    1847             :          * application were using truncate to replace a file in place.
    1848             :          */
    1849      679078 :         if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
    1850      679049 :                                &BTRFS_I(inode)->runtime_flags))
    1851         476 :                         filemap_flush(inode->i_mapping);
    1852      679078 :         return 0;
    1853             : }
    1854             : 
    1855             : /*
    1856             :  * fsync call for both files and directories.  This logs the inode into
    1857             :  * the tree log instead of forcing full commits whenever possible.
    1858             :  *
    1859             :  * It needs to call filemap_fdatawait so that all ordered extent updates are
    1860             :  * in the metadata btree are up to date for copying to the log.
    1861             :  *
    1862             :  * It drops the inode mutex before doing the tree log commit.  This is an
    1863             :  * important optimization for directories because holding the mutex prevents
    1864             :  * new operations on the dir while we write to disk.
    1865             :  */
    1866        2498 : int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
    1867             : {
    1868        2498 :         struct dentry *dentry = file->f_path.dentry;
    1869        2498 :         struct inode *inode = dentry->d_inode;
    1870        2498 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    1871             :         struct btrfs_trans_handle *trans;
    1872             :         struct btrfs_log_ctx ctx;
    1873             :         int ret = 0;
    1874             :         bool full_sync = 0;
    1875             : 
    1876        2498 :         trace_btrfs_sync_file(file, datasync);
    1877             : 
    1878             :         /*
    1879             :          * We write the dirty pages in the range and wait until they complete
    1880             :          * out of the ->i_mutex. If so, we can flush the dirty pages by
    1881             :          * multi-task, and make the performance up.  See
    1882             :          * btrfs_wait_ordered_range for an explanation of the ASYNC check.
    1883             :          */
    1884        2498 :         atomic_inc(&BTRFS_I(inode)->sync_writers);
    1885        2498 :         ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
    1886        4996 :         if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
    1887             :                              &BTRFS_I(inode)->runtime_flags))
    1888          47 :                 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
    1889             :         atomic_dec(&BTRFS_I(inode)->sync_writers);
    1890        2498 :         if (ret)
    1891             :                 return ret;
    1892             : 
    1893        2498 :         mutex_lock(&inode->i_mutex);
    1894             : 
    1895             :         /*
    1896             :          * We flush the dirty pages again to avoid some dirty pages in the
    1897             :          * range being left.
    1898             :          */
    1899        2498 :         atomic_inc(&root->log_batch);
    1900             :         full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    1901             :                              &BTRFS_I(inode)->runtime_flags);
    1902        2498 :         if (full_sync) {
    1903         989 :                 ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
    1904         989 :                 if (ret) {
    1905           0 :                         mutex_unlock(&inode->i_mutex);
    1906           0 :                         goto out;
    1907             :                 }
    1908             :         }
    1909             :         atomic_inc(&root->log_batch);
    1910             : 
    1911             :         /*
    1912             :          * check the transaction that last modified this inode
    1913             :          * and see if its already been committed
    1914             :          */
    1915        2498 :         if (!BTRFS_I(inode)->last_trans) {
    1916          88 :                 mutex_unlock(&inode->i_mutex);
    1917          88 :                 goto out;
    1918             :         }
    1919             : 
    1920             :         /*
    1921             :          * if the last transaction that changed this file was before
    1922             :          * the current transaction, we can bail out now without any
    1923             :          * syncing
    1924             :          */
    1925        2410 :         smp_mb();
    1926        7216 :         if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
    1927        2396 :             BTRFS_I(inode)->last_trans <=
    1928        2396 :             root->fs_info->last_trans_committed) {
    1929         841 :                 BTRFS_I(inode)->last_trans = 0;
    1930             : 
    1931             :                 /*
    1932             :                  * We'v had everything committed since the last time we were
    1933             :                  * modified so clear this flag in case it was set for whatever
    1934             :                  * reason, it's no longer relevant.
    1935             :                  */
    1936             :                 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    1937             :                           &BTRFS_I(inode)->runtime_flags);
    1938         841 :                 mutex_unlock(&inode->i_mutex);
    1939         841 :                 goto out;
    1940             :         }
    1941             : 
    1942             :         /*
    1943             :          * ok we haven't committed the transaction yet, lets do a commit
    1944             :          */
    1945        1569 :         if (file->private_data)
    1946           0 :                 btrfs_ioctl_trans_end(file);
    1947             : 
    1948             :         /*
    1949             :          * We use start here because we will need to wait on the IO to complete
    1950             :          * in btrfs_sync_log, which could require joining a transaction (for
    1951             :          * example checking cross references in the nocow path).  If we use join
    1952             :          * here we could get into a situation where we're waiting on IO to
    1953             :          * happen that is blocked on a transaction trying to commit.  With start
    1954             :          * we inc the extwriter counter, so we wait for all extwriters to exit
    1955             :          * before we start blocking join'ers.  This comment is to keep somebody
    1956             :          * from thinking they are super smart and changing this to
    1957             :          * btrfs_join_transaction *cough*Josef*cough*.
    1958             :          */
    1959        1569 :         trans = btrfs_start_transaction(root, 0);
    1960        1569 :         if (IS_ERR(trans)) {
    1961           0 :                 ret = PTR_ERR(trans);
    1962           0 :                 mutex_unlock(&inode->i_mutex);
    1963           0 :                 goto out;
    1964             :         }
    1965        1569 :         trans->sync = true;
    1966             : 
    1967             :         btrfs_init_log_ctx(&ctx);
    1968             : 
    1969        1569 :         ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
    1970        1569 :         if (ret < 0) {
    1971             :                 /* Fallthrough and commit/free transaction. */
    1972             :                 ret = 1;
    1973             :         }
    1974             : 
    1975             :         /* we've logged all the items and now have a consistent
    1976             :          * version of the file in the log.  It is possible that
    1977             :          * someone will come in and modify the file, but that's
    1978             :          * fine because the log is consistent on disk, and we
    1979             :          * have references to all of the file's extents
    1980             :          *
    1981             :          * It is possible that someone will come in and log the
    1982             :          * file again, but that will end up using the synchronization
    1983             :          * inside btrfs_sync_log to keep things safe.
    1984             :          */
    1985        1569 :         mutex_unlock(&inode->i_mutex);
    1986             : 
    1987        1569 :         if (ret != BTRFS_NO_LOG_SYNC) {
    1988        1569 :                 if (!ret) {
    1989        1483 :                         ret = btrfs_sync_log(trans, root, &ctx);
    1990        1483 :                         if (!ret) {
    1991        1481 :                                 ret = btrfs_end_transaction(trans, root);
    1992        1481 :                                 goto out;
    1993             :                         }
    1994             :                 }
    1995          88 :                 if (!full_sync) {
    1996          10 :                         ret = btrfs_wait_ordered_range(inode, start,
    1997          10 :                                                        end - start + 1);
    1998          10 :                         if (ret) {
    1999           0 :                                 btrfs_end_transaction(trans, root);
    2000           0 :                                 goto out;
    2001             :                         }
    2002             :                 }
    2003          88 :                 ret = btrfs_commit_transaction(trans, root);
    2004             :         } else {
    2005           0 :                 ret = btrfs_end_transaction(trans, root);
    2006             :         }
    2007             : out:
    2008        2498 :         return ret > 0 ? -EIO : ret;
    2009             : }
    2010             : 
    2011             : static const struct vm_operations_struct btrfs_file_vm_ops = {
    2012             :         .fault          = filemap_fault,
    2013             :         .map_pages      = filemap_map_pages,
    2014             :         .page_mkwrite   = btrfs_page_mkwrite,
    2015             :         .remap_pages    = generic_file_remap_pages,
    2016             : };
    2017             : 
    2018     1214001 : static int btrfs_file_mmap(struct file  *filp, struct vm_area_struct *vma)
    2019             : {
    2020     1214001 :         struct address_space *mapping = filp->f_mapping;
    2021             : 
    2022     1214001 :         if (!mapping->a_ops->readpage)
    2023             :                 return -ENOEXEC;
    2024             : 
    2025             :         file_accessed(filp);
    2026     1214058 :         vma->vm_ops = &btrfs_file_vm_ops;
    2027             : 
    2028     1214058 :         return 0;
    2029             : }
    2030             : 
    2031         192 : static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
    2032             :                           int slot, u64 start, u64 end)
    2033             : {
    2034             :         struct btrfs_file_extent_item *fi;
    2035             :         struct btrfs_key key;
    2036             : 
    2037         192 :         if (slot < 0 || slot >= btrfs_header_nritems(leaf))
    2038             :                 return 0;
    2039             : 
    2040          96 :         btrfs_item_key_to_cpu(leaf, &key, slot);
    2041         278 :         if (key.objectid != btrfs_ino(inode) ||
    2042          86 :             key.type != BTRFS_EXTENT_DATA_KEY)
    2043             :                 return 0;
    2044             : 
    2045          86 :         fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    2046             : 
    2047          86 :         if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
    2048             :                 return 0;
    2049             : 
    2050          65 :         if (btrfs_file_extent_disk_bytenr(leaf, fi))
    2051             :                 return 0;
    2052             : 
    2053          56 :         if (key.offset == end)
    2054             :                 return 1;
    2055          50 :         if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
    2056             :                 return 1;
    2057           0 :         return 0;
    2058             : }
    2059             : 
    2060          73 : static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
    2061             :                       struct btrfs_path *path, u64 offset, u64 end)
    2062             : {
    2063          73 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    2064             :         struct extent_buffer *leaf;
    2065             :         struct btrfs_file_extent_item *fi;
    2066             :         struct extent_map *hole_em;
    2067          73 :         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
    2068             :         struct btrfs_key key;
    2069             :         int ret;
    2070             : 
    2071         146 :         if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
    2072             :                 goto out;
    2073             : 
    2074          73 :         key.objectid = btrfs_ino(inode);
    2075          73 :         key.type = BTRFS_EXTENT_DATA_KEY;
    2076          73 :         key.offset = offset;
    2077             : 
    2078          73 :         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
    2079          73 :         if (ret < 0)
    2080             :                 return ret;
    2081          73 :         BUG_ON(!ret);
    2082             : 
    2083          73 :         leaf = path->nodes[0];
    2084          73 :         if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
    2085             :                 u64 num_bytes;
    2086             : 
    2087          50 :                 path->slots[0]--;
    2088          50 :                 fi = btrfs_item_ptr(leaf, path->slots[0],
    2089             :                                     struct btrfs_file_extent_item);
    2090          50 :                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
    2091             :                         end - offset;
    2092             :                 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
    2093             :                 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
    2094             :                 btrfs_set_file_extent_offset(leaf, fi, 0);
    2095          50 :                 btrfs_mark_buffer_dirty(leaf);
    2096          50 :                 goto out;
    2097             :         }
    2098             : 
    2099          23 :         if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
    2100             :                 u64 num_bytes;
    2101             : 
    2102           6 :                 key.offset = offset;
    2103           6 :                 btrfs_set_item_key_safe(root, path, &key);
    2104          12 :                 fi = btrfs_item_ptr(leaf, path->slots[0],
    2105             :                                     struct btrfs_file_extent_item);
    2106           6 :                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
    2107             :                         offset;
    2108             :                 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
    2109             :                 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
    2110             :                 btrfs_set_file_extent_offset(leaf, fi, 0);
    2111           6 :                 btrfs_mark_buffer_dirty(leaf);
    2112           6 :                 goto out;
    2113             :         }
    2114          17 :         btrfs_release_path(path);
    2115             : 
    2116          34 :         ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
    2117             :                                        0, 0, end - offset, 0, end - offset,
    2118             :                                        0, 0, 0);
    2119          17 :         if (ret)
    2120             :                 return ret;
    2121             : 
    2122             : out:
    2123          73 :         btrfs_release_path(path);
    2124             : 
    2125          73 :         hole_em = alloc_extent_map();
    2126          73 :         if (!hole_em) {
    2127           0 :                 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
    2128             :                 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    2129             :                         &BTRFS_I(inode)->runtime_flags);
    2130             :         } else {
    2131          73 :                 hole_em->start = offset;
    2132          73 :                 hole_em->len = end - offset;
    2133          73 :                 hole_em->ram_bytes = hole_em->len;
    2134          73 :                 hole_em->orig_start = offset;
    2135             : 
    2136          73 :                 hole_em->block_start = EXTENT_MAP_HOLE;
    2137          73 :                 hole_em->block_len = 0;
    2138          73 :                 hole_em->orig_block_len = 0;
    2139          73 :                 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
    2140          73 :                 hole_em->compress_type = BTRFS_COMPRESS_NONE;
    2141          73 :                 hole_em->generation = trans->transid;
    2142             : 
    2143             :                 do {
    2144          73 :                         btrfs_drop_extent_cache(inode, offset, end - 1, 0);
    2145          73 :                         write_lock(&em_tree->lock);
    2146          73 :                         ret = add_extent_mapping(em_tree, hole_em, 1);
    2147             :                         write_unlock(&em_tree->lock);
    2148          73 :                 } while (ret == -EEXIST);
    2149          73 :                 free_extent_map(hole_em);
    2150          73 :                 if (ret)
    2151             :                         set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
    2152             :                                 &BTRFS_I(inode)->runtime_flags);
    2153             :         }
    2154             : 
    2155             :         return 0;
    2156             : }
    2157             : 
    2158             : /*
    2159             :  * Find a hole extent on given inode and change start/len to the end of hole
    2160             :  * extent.(hole/vacuum extent whose em->start <= start &&
    2161             :  *         em->start + em->len > start)
    2162             :  * When a hole extent is found, return 1 and modify start/len.
    2163             :  */
    2164         514 : static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
    2165             : {
    2166             :         struct extent_map *em;
    2167             :         int ret = 0;
    2168             : 
    2169         514 :         em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
    2170         514 :         if (IS_ERR_OR_NULL(em)) {
    2171           0 :                 if (!em)
    2172             :                         ret = -ENOMEM;
    2173             :                 else
    2174           0 :                         ret = PTR_ERR(em);
    2175           0 :                 return ret;
    2176             :         }
    2177             : 
    2178             :         /* Hole or vacuum extent(only exists in no-hole mode) */
    2179         514 :         if (em->block_start == EXTENT_MAP_HOLE) {
    2180             :                 ret = 1;
    2181         788 :                 *len = em->start + em->len > *start + *len ?
    2182         394 :                        0 : *start + *len - em->start - em->len;
    2183         394 :                 *start = em->start + em->len;
    2184             :         }
    2185         514 :         free_extent_map(em);
    2186         514 :         return ret;
    2187             : }
    2188             : 
    2189         359 : static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
    2190             : {
    2191         830 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    2192         359 :         struct extent_state *cached_state = NULL;
    2193             :         struct btrfs_path *path;
    2194             :         struct btrfs_block_rsv *rsv;
    2195             :         struct btrfs_trans_handle *trans;
    2196             :         u64 lockstart;
    2197             :         u64 lockend;
    2198             :         u64 tail_start;
    2199             :         u64 tail_len;
    2200         359 :         u64 orig_start = offset;
    2201             :         u64 cur_offset;
    2202             :         u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
    2203             :         u64 drop_end;
    2204             :         int ret = 0;
    2205             :         int err = 0;
    2206             :         int rsv_count;
    2207             :         bool same_page;
    2208         359 :         bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
    2209             :         u64 ino_size;
    2210             : 
    2211         359 :         ret = btrfs_wait_ordered_range(inode, offset, len);
    2212         359 :         if (ret)
    2213             :                 return ret;
    2214             : 
    2215         359 :         mutex_lock(&inode->i_mutex);
    2216         359 :         ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
    2217         359 :         ret = find_first_non_hole(inode, &offset, &len);
    2218         359 :         if (ret < 0)
    2219             :                 goto out_only_mutex;
    2220         359 :         if (ret && !len) {
    2221             :                 /* Already in a large hole */
    2222             :                 ret = 0;
    2223             :                 goto out_only_mutex;
    2224             :         }
    2225             : 
    2226         113 :         lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
    2227         226 :         lockend = round_down(offset + len,
    2228         113 :                              BTRFS_I(inode)->root->sectorsize) - 1;
    2229         113 :         same_page = ((offset >> PAGE_CACHE_SHIFT) ==
    2230         113 :                     ((offset + len - 1) >> PAGE_CACHE_SHIFT));
    2231             : 
    2232             :         /*
    2233             :          * We needn't truncate any page which is beyond the end of the file
    2234             :          * because we are sure there is no data there.
    2235             :          */
    2236             :         /*
    2237             :          * Only do this if we are in the same page and we aren't doing the
    2238             :          * entire page.
    2239             :          */
    2240         113 :         if (same_page && len < PAGE_CACHE_SIZE) {
    2241           1 :                 if (offset < ino_size)
    2242           0 :                         ret = btrfs_truncate_page(inode, offset, len, 0);
    2243             :                 goto out_only_mutex;
    2244             :         }
    2245             : 
    2246             :         /* zero back part of the first page */
    2247         112 :         if (offset < ino_size) {
    2248          74 :                 ret = btrfs_truncate_page(inode, offset, 0, 0);
    2249          74 :                 if (ret) {
    2250           0 :                         mutex_unlock(&inode->i_mutex);
    2251           0 :                         return ret;
    2252             :                 }
    2253             :         }
    2254             : 
    2255             :         /* Check the aligned pages after the first unaligned page,
    2256             :          * if offset != orig_start, which means the first unaligned page
    2257             :          * including serveral following pages are already in holes,
    2258             :          * the extra check can be skipped */
    2259         112 :         if (offset == orig_start) {
    2260             :                 /* after truncate page, check hole again */
    2261          44 :                 len = offset + len - lockstart;
    2262          44 :                 offset = lockstart;
    2263          44 :                 ret = find_first_non_hole(inode, &offset, &len);
    2264          44 :                 if (ret < 0)
    2265             :                         goto out_only_mutex;
    2266          44 :                 if (ret && !len) {
    2267             :                         ret = 0;
    2268             :                         goto out_only_mutex;
    2269             :                 }
    2270          44 :                 lockstart = offset;
    2271             :         }
    2272             : 
    2273             :         /* Check the tail unaligned part is in a hole */
    2274         112 :         tail_start = lockend + 1;
    2275         112 :         tail_len = offset + len - tail_start;
    2276         112 :         if (tail_len) {
    2277         111 :                 ret = find_first_non_hole(inode, &tail_start, &tail_len);
    2278         111 :                 if (unlikely(ret < 0))
    2279             :                         goto out_only_mutex;
    2280         111 :                 if (!ret) {
    2281             :                         /* zero the front end of the last page */
    2282          34 :                         if (tail_start + tail_len < ino_size) {
    2283          18 :                                 ret = btrfs_truncate_page(inode,
    2284             :                                                 tail_start + tail_len, 0, 1);
    2285          18 :                                 if (ret)
    2286             :                                         goto out_only_mutex;
    2287             :                         }
    2288             :                 }
    2289             :         }
    2290             : 
    2291         112 :         if (lockend < lockstart) {
    2292           0 :                 mutex_unlock(&inode->i_mutex);
    2293           0 :                 return 0;
    2294             :         }
    2295             : 
    2296             :         while (1) {
    2297             :                 struct btrfs_ordered_extent *ordered;
    2298             : 
    2299         112 :                 truncate_pagecache_range(inode, lockstart, lockend);
    2300             : 
    2301         112 :                 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
    2302             :                                  0, &cached_state);
    2303         112 :                 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
    2304             : 
    2305             :                 /*
    2306             :                  * We need to make sure we have no ordered extents in this range
    2307             :                  * and nobody raced in and read a page in this range, if we did
    2308             :                  * we need to try again.
    2309             :                  */
    2310         112 :                 if ((!ordered ||
    2311           0 :                     (ordered->file_offset + ordered->len <= lockstart ||
    2312         112 :                      ordered->file_offset > lockend)) &&
    2313         112 :                      !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
    2314         112 :                         if (ordered)
    2315           0 :                                 btrfs_put_ordered_extent(ordered);
    2316             :                         break;
    2317             :                 }
    2318           0 :                 if (ordered)
    2319           0 :                         btrfs_put_ordered_extent(ordered);
    2320           0 :                 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
    2321             :                                      lockend, &cached_state, GFP_NOFS);
    2322           0 :                 ret = btrfs_wait_ordered_range(inode, lockstart,
    2323           0 :                                                lockend - lockstart + 1);
    2324           0 :                 if (ret) {
    2325           0 :                         mutex_unlock(&inode->i_mutex);
    2326           0 :                         return ret;
    2327             :                 }
    2328             :         }
    2329             : 
    2330         112 :         path = btrfs_alloc_path();
    2331         112 :         if (!path) {
    2332             :                 ret = -ENOMEM;
    2333             :                 goto out;
    2334             :         }
    2335             : 
    2336         112 :         rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
    2337         112 :         if (!rsv) {
    2338             :                 ret = -ENOMEM;
    2339             :                 goto out_free;
    2340             :         }
    2341         112 :         rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
    2342         112 :         rsv->failfast = 1;
    2343             : 
    2344             :         /*
    2345             :          * 1 - update the inode
    2346             :          * 1 - removing the extents in the range
    2347             :          * 1 - adding the hole extent if no_holes isn't set
    2348             :          */
    2349         112 :         rsv_count = no_holes ? 2 : 3;
    2350         112 :         trans = btrfs_start_transaction(root, rsv_count);
    2351         112 :         if (IS_ERR(trans)) {
    2352           0 :                 err = PTR_ERR(trans);
    2353           0 :                 goto out_free;
    2354             :         }
    2355             : 
    2356         112 :         ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
    2357             :                                       min_size);
    2358         112 :         BUG_ON(ret);
    2359         112 :         trans->block_rsv = rsv;
    2360             : 
    2361         112 :         cur_offset = lockstart;
    2362         112 :         len = lockend - cur_offset;
    2363         224 :         while (cur_offset < lockend) {
    2364         112 :                 ret = __btrfs_drop_extents(trans, root, inode, path,
    2365             :                                            cur_offset, lockend + 1,
    2366             :                                            &drop_end, 1, 0, 0, NULL);
    2367         112 :                 if (ret != -ENOSPC)
    2368             :                         break;
    2369             : 
    2370           0 :                 trans->block_rsv = &root->fs_info->trans_block_rsv;
    2371             : 
    2372           0 :                 if (cur_offset < ino_size) {
    2373           0 :                         ret = fill_holes(trans, inode, path, cur_offset,
    2374             :                                          drop_end);
    2375           0 :                         if (ret) {
    2376             :                                 err = ret;
    2377             :                                 break;
    2378             :                         }
    2379             :                 }
    2380             : 
    2381           0 :                 cur_offset = drop_end;
    2382             : 
    2383           0 :                 ret = btrfs_update_inode(trans, root, inode);
    2384           0 :                 if (ret) {
    2385             :                         err = ret;
    2386             :                         break;
    2387             :                 }
    2388             : 
    2389           0 :                 btrfs_end_transaction(trans, root);
    2390           0 :                 btrfs_btree_balance_dirty(root);
    2391             : 
    2392           0 :                 trans = btrfs_start_transaction(root, rsv_count);
    2393           0 :                 if (IS_ERR(trans)) {
    2394           0 :                         ret = PTR_ERR(trans);
    2395             :                         trans = NULL;
    2396           0 :                         break;
    2397             :                 }
    2398             : 
    2399           0 :                 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
    2400             :                                               rsv, min_size);
    2401           0 :                 BUG_ON(ret);    /* shouldn't happen */
    2402           0 :                 trans->block_rsv = rsv;
    2403             : 
    2404           0 :                 ret = find_first_non_hole(inode, &cur_offset, &len);
    2405           0 :                 if (unlikely(ret < 0))
    2406             :                         break;
    2407           0 :                 if (ret && !len) {
    2408             :                         ret = 0;
    2409             :                         break;
    2410             :                 }
    2411             :         }
    2412             : 
    2413         112 :         if (ret) {
    2414             :                 err = ret;
    2415             :                 goto out_trans;
    2416             :         }
    2417             : 
    2418         112 :         trans->block_rsv = &root->fs_info->trans_block_rsv;
    2419             :         /*
    2420             :          * Don't insert file hole extent item if it's for a range beyond eof
    2421             :          * (because it's useless) or if it represents a 0 bytes range (when
    2422             :          * cur_offset == drop_end).
    2423             :          */
    2424         112 :         if (cur_offset < ino_size && cur_offset < drop_end) {
    2425          73 :                 ret = fill_holes(trans, inode, path, cur_offset, drop_end);
    2426          73 :                 if (ret) {
    2427             :                         err = ret;
    2428           0 :                         goto out_trans;
    2429             :                 }
    2430             :         }
    2431             : 
    2432             : out_trans:
    2433         112 :         if (!trans)
    2434             :                 goto out_free;
    2435             : 
    2436             :         inode_inc_iversion(inode);
    2437         112 :         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
    2438             : 
    2439         112 :         trans->block_rsv = &root->fs_info->trans_block_rsv;
    2440         112 :         ret = btrfs_update_inode(trans, root, inode);
    2441         112 :         btrfs_end_transaction(trans, root);
    2442         112 :         btrfs_btree_balance_dirty(root);
    2443             : out_free:
    2444         112 :         btrfs_free_path(path);
    2445         112 :         btrfs_free_block_rsv(root, rsv);
    2446             : out:
    2447         112 :         unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
    2448             :                              &cached_state, GFP_NOFS);
    2449             : out_only_mutex:
    2450         359 :         mutex_unlock(&inode->i_mutex);
    2451         359 :         if (ret && !err)
    2452             :                 err = ret;
    2453         359 :         return err;
    2454             : }
    2455             : 
    2456        3730 : static long btrfs_fallocate(struct file *file, int mode,
    2457             :                             loff_t offset, loff_t len)
    2458             : {
    2459             :         struct inode *inode = file_inode(file);
    2460        3730 :         struct extent_state *cached_state = NULL;
    2461        3730 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    2462             :         u64 cur_offset;
    2463             :         u64 last_byte;
    2464             :         u64 alloc_start;
    2465             :         u64 alloc_end;
    2466        3730 :         u64 alloc_hint = 0;
    2467             :         u64 locked_end;
    2468      225822 :         struct extent_map *em;
    2469        3730 :         int blocksize = BTRFS_I(inode)->root->sectorsize;
    2470             :         int ret;
    2471             : 
    2472        3730 :         alloc_start = round_down(offset, blocksize);
    2473        3730 :         alloc_end = round_up(offset + len, blocksize);
    2474             : 
    2475             :         /* Make sure we aren't being give some crap mode */
    2476        3730 :         if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
    2477             :                 return -EOPNOTSUPP;
    2478             : 
    2479        2664 :         if (mode & FALLOC_FL_PUNCH_HOLE)
    2480         359 :                 return btrfs_punch_hole(inode, offset, len);
    2481             : 
    2482             :         /*
    2483             :          * Make sure we have enough space before we do the
    2484             :          * allocation.
    2485             :          */
    2486        2305 :         ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
    2487        2305 :         if (ret)
    2488           0 :                 return ret;
    2489        2305 :         if (root->fs_info->quota_enabled) {
    2490         194 :                 ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
    2491         194 :                 if (ret)
    2492             :                         goto out_reserve_fail;
    2493             :         }
    2494             : 
    2495        2305 :         mutex_lock(&inode->i_mutex);
    2496        2305 :         ret = inode_newsize_ok(inode, alloc_end);
    2497        2305 :         if (ret)
    2498             :                 goto out;
    2499             : 
    2500        2305 :         if (alloc_start > inode->i_size) {
    2501         920 :                 ret = btrfs_cont_expand(inode, i_size_read(inode),
    2502             :                                         alloc_start);
    2503         920 :                 if (ret)
    2504             :                         goto out;
    2505             :         } else {
    2506             :                 /*
    2507             :                  * If we are fallocating from the end of the file onward we
    2508             :                  * need to zero out the end of the page if i_size lands in the
    2509             :                  * middle of a page.
    2510             :                  */
    2511        1385 :                 ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
    2512        1385 :                 if (ret)
    2513             :                         goto out;
    2514             :         }
    2515             : 
    2516             :         /*
    2517             :          * wait for ordered IO before we have any locks.  We'll loop again
    2518             :          * below with the locks held.
    2519             :          */
    2520        2305 :         ret = btrfs_wait_ordered_range(inode, alloc_start,
    2521             :                                        alloc_end - alloc_start);
    2522        2305 :         if (ret)
    2523             :                 goto out;
    2524             : 
    2525        2305 :         locked_end = alloc_end - 1;
    2526             :         while (1) {
    2527             :                 struct btrfs_ordered_extent *ordered;
    2528             : 
    2529             :                 /* the extent lock is ordered inside the running
    2530             :                  * transaction
    2531             :                  */
    2532        2305 :                 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
    2533             :                                  locked_end, 0, &cached_state);
    2534        2305 :                 ordered = btrfs_lookup_first_ordered_extent(inode,
    2535             :                                                             alloc_end - 1);
    2536        2308 :                 if (ordered &&
    2537           3 :                     ordered->file_offset + ordered->len > alloc_start &&
    2538             :                     ordered->file_offset < alloc_end) {
    2539           0 :                         btrfs_put_ordered_extent(ordered);
    2540           0 :                         unlock_extent_cached(&BTRFS_I(inode)->io_tree,
    2541             :                                              alloc_start, locked_end,
    2542             :                                              &cached_state, GFP_NOFS);
    2543             :                         /*
    2544             :                          * we can't wait on the range with the transaction
    2545             :                          * running or with the extent lock held
    2546             :                          */
    2547           0 :                         ret = btrfs_wait_ordered_range(inode, alloc_start,
    2548             :                                                        alloc_end - alloc_start);
    2549           0 :                         if (ret)
    2550             :                                 goto out;
    2551             :                 } else {
    2552        2305 :                         if (ordered)
    2553           3 :                                 btrfs_put_ordered_extent(ordered);
    2554             :                         break;
    2555             :                 }
    2556             :         }
    2557             : 
    2558             :         cur_offset = alloc_start;
    2559             :         while (1) {
    2560             :                 u64 actual_end;
    2561             : 
    2562      225822 :                 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
    2563             :                                       alloc_end - cur_offset, 0);
    2564      225822 :                 if (IS_ERR_OR_NULL(em)) {
    2565           0 :                         if (!em)
    2566             :                                 ret = -ENOMEM;
    2567             :                         else
    2568           0 :                                 ret = PTR_ERR(em);
    2569             :                         break;
    2570             :                 }
    2571      225822 :                 last_byte = min(extent_map_end(em), alloc_end);
    2572      225822 :                 actual_end = min_t(u64, extent_map_end(em), offset + len);
    2573      225822 :                 last_byte = ALIGN(last_byte, blocksize);
    2574             : 
    2575      449058 :                 if (em->block_start == EXTENT_MAP_HOLE ||
    2576      223374 :                     (cur_offset >= inode->i_size &&
    2577             :                      !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
    2578        2586 :                         ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
    2579             :                                                         last_byte - cur_offset,
    2580        2586 :                                                         1 << inode->i_blkbits,
    2581             :                                                         offset + len,
    2582             :                                                         &alloc_hint);
    2583             : 
    2584        2586 :                         if (ret < 0) {
    2585           0 :                                 free_extent_map(em);
    2586           0 :                                 break;
    2587             :                         }
    2588      223597 :                 } else if (actual_end > inode->i_size &&
    2589         361 :                            !(mode & FALLOC_FL_KEEP_SIZE)) {
    2590             :                         /*
    2591             :                          * We didn't need to allocate any more space, but we
    2592             :                          * still extended the size of the file so we need to
    2593             :                          * update i_size.
    2594             :                          */
    2595          93 :                         inode->i_ctime = CURRENT_TIME;
    2596          93 :                         i_size_write(inode, actual_end);
    2597          93 :                         btrfs_ordered_update_i_size(inode, actual_end, NULL);
    2598             :                 }
    2599      225822 :                 free_extent_map(em);
    2600             : 
    2601             :                 cur_offset = last_byte;
    2602      225822 :                 if (cur_offset >= alloc_end) {
    2603             :                         ret = 0;
    2604             :                         break;
    2605             :                 }
    2606             :         }
    2607        2305 :         unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
    2608             :                              &cached_state, GFP_NOFS);
    2609             : out:
    2610        2305 :         mutex_unlock(&inode->i_mutex);
    2611        2305 :         if (root->fs_info->quota_enabled)
    2612         194 :                 btrfs_qgroup_free(root, alloc_end - alloc_start);
    2613             : out_reserve_fail:
    2614             :         /* Let go of our reservation. */
    2615        2305 :         btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
    2616        2305 :         return ret;
    2617             : }
    2618             : 
    2619        1482 : static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
    2620             : {
    2621         741 :         struct btrfs_root *root = BTRFS_I(inode)->root;
    2622             :         struct extent_map *em = NULL;
    2623         741 :         struct extent_state *cached_state = NULL;
    2624         741 :         u64 lockstart = *offset;
    2625         741 :         u64 lockend = i_size_read(inode);
    2626             :         u64 start = *offset;
    2627             :         u64 len = i_size_read(inode);
    2628             :         int ret = 0;
    2629             : 
    2630         741 :         lockend = max_t(u64, root->sectorsize, lockend);
    2631         741 :         if (lockend <= lockstart)
    2632           0 :                 lockend = lockstart + root->sectorsize;
    2633             : 
    2634         741 :         lockend--;
    2635         741 :         len = lockend - lockstart + 1;
    2636             : 
    2637         741 :         len = max_t(u64, len, root->sectorsize);
    2638         741 :         if (inode->i_size == 0)
    2639             :                 return -ENXIO;
    2640             : 
    2641         741 :         lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
    2642             :                          &cached_state);
    2643             : 
    2644        1540 :         while (start < inode->i_size) {
    2645         795 :                 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
    2646         795 :                 if (IS_ERR(em)) {
    2647           0 :                         ret = PTR_ERR(em);
    2648             :                         em = NULL;
    2649           0 :                         break;
    2650             :                 }
    2651             : 
    2652         797 :                 if (whence == SEEK_HOLE &&
    2653           3 :                     (em->block_start == EXTENT_MAP_HOLE ||
    2654             :                      test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
    2655             :                         break;
    2656        1587 :                 else if (whence == SEEK_DATA &&
    2657        1540 :                            (em->block_start != EXTENT_MAP_HOLE &&
    2658             :                             !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
    2659             :                         break;
    2660             : 
    2661          58 :                 start = em->start + em->len;
    2662          58 :                 free_extent_map(em);
    2663             :                 em = NULL;
    2664          58 :                 cond_resched();
    2665             :         }
    2666         741 :         free_extent_map(em);
    2667         741 :         if (!ret) {
    2668         741 :                 if (whence == SEEK_DATA && start >= inode->i_size)
    2669             :                         ret = -ENXIO;
    2670             :                 else
    2671         737 :                         *offset = min_t(loff_t, start, inode->i_size);
    2672             :         }
    2673         741 :         unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
    2674             :                              &cached_state, GFP_NOFS);
    2675         741 :         return ret;
    2676             : }
    2677             : 
    2678       13319 : static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
    2679             : {
    2680       14236 :         struct inode *inode = file->f_mapping->host;
    2681             :         int ret;
    2682             : 
    2683       13319 :         mutex_lock(&inode->i_mutex);
    2684       13319 :         switch (whence) {
    2685             :         case SEEK_END:
    2686             :         case SEEK_CUR:
    2687        7950 :                 offset = generic_file_llseek(file, offset, whence);
    2688        7950 :                 goto out;
    2689             :         case SEEK_DATA:
    2690             :         case SEEK_HOLE:
    2691         917 :                 if (offset >= i_size_read(inode)) {
    2692         176 :                         mutex_unlock(&inode->i_mutex);
    2693         176 :                         return -ENXIO;
    2694             :                 }
    2695             : 
    2696         741 :                 ret = find_desired_extent(inode, &offset, whence);
    2697         741 :                 if (ret) {
    2698           4 :                         mutex_unlock(&inode->i_mutex);
    2699           4 :                         return ret;
    2700             :                 }
    2701             :         }
    2702             : 
    2703        5189 :         offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
    2704             : out:
    2705       13139 :         mutex_unlock(&inode->i_mutex);
    2706       13139 :         return offset;
    2707             : }
    2708             : 
    2709             : const struct file_operations btrfs_file_operations = {
    2710             :         .llseek         = btrfs_file_llseek,
    2711             :         .read           = new_sync_read,
    2712             :         .write          = new_sync_write,
    2713             :         .read_iter      = generic_file_read_iter,
    2714             :         .splice_read    = generic_file_splice_read,
    2715             :         .write_iter     = btrfs_file_write_iter,
    2716             :         .mmap           = btrfs_file_mmap,
    2717             :         .open           = generic_file_open,
    2718             :         .release        = btrfs_release_file,
    2719             :         .fsync          = btrfs_sync_file,
    2720             :         .fallocate      = btrfs_fallocate,
    2721             :         .unlocked_ioctl = btrfs_ioctl,
    2722             : #ifdef CONFIG_COMPAT
    2723             :         .compat_ioctl   = btrfs_ioctl,
    2724             : #endif
    2725             : };
    2726             : 
    2727           0 : void btrfs_auto_defrag_exit(void)
    2728             : {
    2729           0 :         if (btrfs_inode_defrag_cachep)
    2730           0 :                 kmem_cache_destroy(btrfs_inode_defrag_cachep);
    2731           0 : }
    2732             : 
    2733           0 : int btrfs_auto_defrag_init(void)
    2734             : {
    2735           0 :         btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
    2736             :                                         sizeof(struct inode_defrag), 0,
    2737             :                                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
    2738             :                                         NULL);
    2739           0 :         if (!btrfs_inode_defrag_cachep)
    2740             :                 return -ENOMEM;
    2741             : 
    2742           0 :         return 0;
    2743             : }

Generated by: LCOV version 1.10