LCOV - code coverage report
Current view: top level - fs/btrfs - ordered-data.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 339 386 87.8 %
Date: 2014-11-28 Functions: 26 30 86.7 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : 
      19             : #include <linux/slab.h>
      20             : #include <linux/blkdev.h>
      21             : #include <linux/writeback.h>
      22             : #include <linux/pagevec.h>
      23             : #include "ctree.h"
      24             : #include "transaction.h"
      25             : #include "btrfs_inode.h"
      26             : #include "extent_io.h"
      27             : #include "disk-io.h"
      28             : 
      29             : static struct kmem_cache *btrfs_ordered_extent_cache;
      30             : 
      31             : static u64 entry_end(struct btrfs_ordered_extent *entry)
      32             : {
      33      257359 :         if (entry->file_offset + entry->len < entry->file_offset)
      34             :                 return (u64)-1;
      35             :         return entry->file_offset + entry->len;
      36             : }
      37             : 
      38             : /* returns NULL if the insertion worked, or it returns the node it did find
      39             :  * in the tree
      40             :  */
      41       51501 : static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
      42             :                                    struct rb_node *node)
      43             : {
      44       51501 :         struct rb_node **p = &root->rb_node;
      45             :         struct rb_node *parent = NULL;
      46       70981 :         struct btrfs_ordered_extent *entry;
      47             : 
      48      175100 :         while (*p) {
      49             :                 parent = *p;
      50             :                 entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
      51             : 
      52       72097 :                 if (file_offset < entry->file_offset)
      53        1116 :                         p = &(*p)->rb_left;
      54       70981 :                 else if (file_offset >= entry_end(entry))
      55       70982 :                         p = &(*p)->rb_right;
      56             :                 else
      57             :                         return parent;
      58             :         }
      59             : 
      60             :         rb_link_node(node, parent, p);
      61       51502 :         rb_insert_color(node, root);
      62       51500 :         return NULL;
      63             : }
      64             : 
      65           0 : static void ordered_data_tree_panic(struct inode *inode, int errno,
      66             :                                                u64 offset)
      67             : {
      68           0 :         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
      69           0 :         btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
      70             :                     "%llu", offset);
      71             : }
      72             : 
      73             : /*
      74             :  * look for a given offset in the tree, and if it can't be found return the
      75             :  * first lesser offset
      76             :  */
      77     2178179 : static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
      78             :                                      struct rb_node **prev_ret)
      79             : {
      80     2178179 :         struct rb_node *n = root->rb_node;
      81             :         struct rb_node *prev = NULL;
      82             :         struct rb_node *test;
      83      124452 :         struct btrfs_ordered_extent *entry;
      84       10174 :         struct btrfs_ordered_extent *prev_entry = NULL;
      85             : 
      86     4446273 :         while (n) {
      87      152192 :                 entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
      88             :                 prev = n;
      89             :                 prev_entry = entry;
      90             : 
      91      152192 :                 if (file_offset < entry->file_offset)
      92       27740 :                         n = n->rb_left;
      93      124452 :                 else if (file_offset >= entry_end(entry))
      94       62175 :                         n = n->rb_right;
      95             :                 else
      96             :                         return n;
      97             :         }
      98     2115902 :         if (!prev_ret)
      99             :                 return NULL;
     100             : 
     101     2120983 :         while (prev && file_offset >= entry_end(prev_entry)) {
     102        5008 :                 test = rb_next(prev);
     103        5008 :                 if (!test)
     104             :                         break;
     105           2 :                 prev_entry = rb_entry(test, struct btrfs_ordered_extent,
     106             :                                       rb_node);
     107           2 :                 if (file_offset < entry_end(prev_entry))
     108             :                         break;
     109             : 
     110             :                 prev = test;
     111             :         }
     112     2115905 :         if (prev)
     113        5079 :                 prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
     114             :                                       rb_node);
     115     2121014 :         while (prev && file_offset < entry_end(prev_entry)) {
     116          71 :                 test = rb_prev(prev);
     117          71 :                 if (!test)
     118             :                         break;
     119          15 :                 prev_entry = rb_entry(test, struct btrfs_ordered_extent,
     120             :                                       rb_node);
     121             :                 prev = test;
     122             :         }
     123     2115905 :         *prev_ret = prev;
     124     2115905 :         return NULL;
     125             : }
     126             : 
     127             : /*
     128             :  * helper to check if a given offset is inside a given entry
     129             :  */
     130             : static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
     131             : {
     132     5626158 :         if (file_offset < entry->file_offset ||
     133     2811806 :             entry->file_offset + entry->len <= file_offset)
     134             :                 return 0;
     135             :         return 1;
     136             : }
     137             : 
     138             : static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
     139             :                           u64 len)
     140             : {
     141        4418 :         if (file_offset + len <= entry->file_offset ||
     142        2203 :             entry->file_offset + entry->len <= file_offset)
     143             :                 return 0;
     144             :         return 1;
     145             : }
     146             : 
     147             : /*
     148             :  * look find the first ordered struct that has this offset, otherwise
     149             :  * the first one less than this offset
     150             :  */
     151     3540964 : static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
     152             :                                           u64 file_offset)
     153             : {
     154     3540964 :         struct rb_root *root = &tree->tree;
     155     3540964 :         struct rb_node *prev = NULL;
     156             :         struct rb_node *ret;
     157     1389046 :         struct btrfs_ordered_extent *entry;
     158             : 
     159     3540964 :         if (tree->last) {
     160             :                 entry = rb_entry(tree->last, struct btrfs_ordered_extent,
     161             :                                  rb_node);
     162     1389046 :                 if (offset_in_entry(entry, file_offset))
     163             :                         return tree->last;
     164             :         }
     165     2178117 :         ret = __tree_search(root, file_offset, &prev);
     166     2178175 :         if (!ret)
     167     2115908 :                 ret = prev;
     168     2178175 :         if (ret)
     169       67358 :                 tree->last = ret;
     170     2178175 :         return ret;
     171             : }
     172             : 
     173             : /* allocate and add a new ordered_extent into the per-inode tree.
     174             :  * file_offset is the logical offset in the file
     175             :  *
     176             :  * start is the disk block number of an extent already reserved in the
     177             :  * extent allocation tree
     178             :  *
     179             :  * len is the length of the extent
     180             :  *
     181             :  * The tree is given a single reference on the ordered extent that was
     182             :  * inserted.
     183             :  */
     184       51494 : static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
     185             :                                       u64 start, u64 len, u64 disk_len,
     186             :                                       int type, int dio, int compress_type)
     187             : {
     188       51494 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     189             :         struct btrfs_ordered_inode_tree *tree;
     190             :         struct rb_node *node;
     191             :         struct btrfs_ordered_extent *entry;
     192             : 
     193             :         tree = &BTRFS_I(inode)->ordered_tree;
     194       51494 :         entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
     195       51497 :         if (!entry)
     196             :                 return -ENOMEM;
     197             : 
     198       51498 :         entry->file_offset = file_offset;
     199       51498 :         entry->start = start;
     200       51498 :         entry->len = len;
     201       51498 :         if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
     202             :             !(type == BTRFS_ORDERED_NOCOW))
     203       47440 :                 entry->csum_bytes_left = disk_len;
     204       51498 :         entry->disk_len = disk_len;
     205       51498 :         entry->bytes_left = len;
     206       51498 :         entry->inode = igrab(inode);
     207       51505 :         entry->compress_type = compress_type;
     208       51505 :         entry->truncated_len = (u64)-1;
     209       51505 :         if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
     210        5860 :                 set_bit(type, &entry->flags);
     211             : 
     212       51503 :         if (dio)
     213             :                 set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
     214             : 
     215             :         /* one ref for the tree */
     216             :         atomic_set(&entry->refs, 1);
     217       51503 :         init_waitqueue_head(&entry->wait);
     218       51500 :         INIT_LIST_HEAD(&entry->list);
     219       51500 :         INIT_LIST_HEAD(&entry->root_extent_list);
     220       51500 :         INIT_LIST_HEAD(&entry->work_list);
     221             :         init_completion(&entry->completion);
     222       51498 :         INIT_LIST_HEAD(&entry->log_list);
     223             : 
     224       51498 :         trace_btrfs_ordered_extent_add(inode, entry);
     225             : 
     226             :         spin_lock_irq(&tree->lock);
     227       51501 :         node = tree_insert(&tree->tree, file_offset,
     228             :                            &entry->rb_node);
     229       51499 :         if (node)
     230           0 :                 ordered_data_tree_panic(inode, -EEXIST, file_offset);
     231             :         spin_unlock_irq(&tree->lock);
     232             : 
     233             :         spin_lock(&root->ordered_extent_lock);
     234       51503 :         list_add_tail(&entry->root_extent_list,
     235             :                       &root->ordered_extents);
     236       51503 :         root->nr_ordered_extents++;
     237       51503 :         if (root->nr_ordered_extents == 1) {
     238       30682 :                 spin_lock(&root->fs_info->ordered_root_lock);
     239       61370 :                 BUG_ON(!list_empty(&root->ordered_root));
     240       30685 :                 list_add_tail(&root->ordered_root,
     241       30685 :                               &root->fs_info->ordered_roots);
     242       30685 :                 spin_unlock(&root->fs_info->ordered_root_lock);
     243             :         }
     244             :         spin_unlock(&root->ordered_extent_lock);
     245             : 
     246       51504 :         return 0;
     247             : }
     248             : 
     249       26091 : int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
     250             :                              u64 start, u64 len, u64 disk_len, int type)
     251             : {
     252       26091 :         return __btrfs_add_ordered_extent(inode, file_offset, start, len,
     253             :                                           disk_len, type, 0,
     254             :                                           BTRFS_COMPRESS_NONE);
     255             : }
     256             : 
     257       25252 : int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
     258             :                                  u64 start, u64 len, u64 disk_len, int type)
     259             : {
     260       25252 :         return __btrfs_add_ordered_extent(inode, file_offset, start, len,
     261             :                                           disk_len, type, 1,
     262             :                                           BTRFS_COMPRESS_NONE);
     263             : }
     264             : 
     265         153 : int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
     266             :                                       u64 start, u64 len, u64 disk_len,
     267             :                                       int type, int compress_type)
     268             : {
     269         153 :         return __btrfs_add_ordered_extent(inode, file_offset, start, len,
     270             :                                           disk_len, type, 0,
     271             :                                           compress_type);
     272             : }
     273             : 
     274             : /*
     275             :  * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
     276             :  * when an ordered extent is finished.  If the list covers more than one
     277             :  * ordered extent, it is split across multiples.
     278             :  */
     279       90839 : void btrfs_add_ordered_sum(struct inode *inode,
     280             :                            struct btrfs_ordered_extent *entry,
     281             :                            struct btrfs_ordered_sum *sum)
     282             : {
     283             :         struct btrfs_ordered_inode_tree *tree;
     284             : 
     285             :         tree = &BTRFS_I(inode)->ordered_tree;
     286             :         spin_lock_irq(&tree->lock);
     287       90884 :         list_add_tail(&sum->list, &entry->list);
     288       90883 :         WARN_ON(entry->csum_bytes_left < sum->len);
     289       90882 :         entry->csum_bytes_left -= sum->len;
     290       90882 :         if (entry->csum_bytes_left == 0)
     291       47444 :                 wake_up(&entry->wait);
     292             :         spin_unlock_irq(&tree->lock);
     293       90876 : }
     294             : 
     295             : /*
     296             :  * this is used to account for finished IO across a given range
     297             :  * of the file.  The IO may span ordered extents.  If
     298             :  * a given ordered_extent is completely done, 1 is returned, otherwise
     299             :  * 0.
     300             :  *
     301             :  * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
     302             :  * to make sure this function only returns 1 once for a given ordered extent.
     303             :  *
     304             :  * file_offset is updated to one byte past the range that is recorded as
     305             :  * complete.  This allows you to walk forward in the file.
     306             :  */
     307       25260 : int btrfs_dec_test_first_ordered_pending(struct inode *inode,
     308             :                                    struct btrfs_ordered_extent **cached,
     309             :                                    u64 *file_offset, u64 io_size, int uptodate)
     310             : {
     311             :         struct btrfs_ordered_inode_tree *tree;
     312             :         struct rb_node *node;
     313       25260 :         struct btrfs_ordered_extent *entry = NULL;
     314             :         int ret;
     315             :         unsigned long flags;
     316             :         u64 dec_end;
     317             :         u64 dec_start;
     318             :         u64 to_dec;
     319             : 
     320       25260 :         tree = &BTRFS_I(inode)->ordered_tree;
     321       25260 :         spin_lock_irqsave(&tree->lock, flags);
     322       25260 :         node = tree_search(tree, *file_offset);
     323       25260 :         if (!node) {
     324             :                 ret = 1;
     325             :                 goto out;
     326             :         }
     327             : 
     328       25260 :         entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
     329       50520 :         if (!offset_in_entry(entry, *file_offset)) {
     330             :                 ret = 1;
     331             :                 goto out;
     332             :         }
     333             : 
     334       25260 :         dec_start = max(*file_offset, entry->file_offset);
     335       25260 :         dec_end = min(*file_offset + io_size, entry->file_offset +
     336             :                       entry->len);
     337       25260 :         *file_offset = dec_end;
     338       25260 :         if (dec_start > dec_end) {
     339           0 :                 btrfs_crit(BTRFS_I(inode)->root->fs_info,
     340             :                         "bad ordering dec_start %llu end %llu", dec_start, dec_end);
     341             :         }
     342       25258 :         to_dec = dec_end - dec_start;
     343       25258 :         if (to_dec > entry->bytes_left) {
     344           0 :                 btrfs_crit(BTRFS_I(inode)->root->fs_info,
     345             :                         "bad ordered accounting left %llu size %llu",
     346             :                         entry->bytes_left, to_dec);
     347             :         }
     348       25258 :         entry->bytes_left -= to_dec;
     349       25258 :         if (!uptodate)
     350             :                 set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
     351             : 
     352       25258 :         if (entry->bytes_left == 0) {
     353       25259 :                 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
     354       25260 :                 if (waitqueue_active(&entry->wait))
     355           0 :                         wake_up(&entry->wait);
     356             :         } else {
     357             :                 ret = 1;
     358             :         }
     359             : out:
     360       25259 :         if (!ret && cached && entry) {
     361       25259 :                 *cached = entry;
     362       25259 :                 atomic_inc(&entry->refs);
     363             :         }
     364             :         spin_unlock_irqrestore(&tree->lock, flags);
     365       25260 :         return ret == 0;
     366             : }
     367             : 
     368             : /*
     369             :  * this is used to account for finished IO across a given range
     370             :  * of the file.  The IO should not span ordered extents.  If
     371             :  * a given ordered_extent is completely done, 1 is returned, otherwise
     372             :  * 0.
     373             :  *
     374             :  * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
     375             :  * to make sure this function only returns 1 once for a given ordered extent.
     376             :  */
     377     1309063 : int btrfs_dec_test_ordered_pending(struct inode *inode,
     378             :                                    struct btrfs_ordered_extent **cached,
     379             :                                    u64 file_offset, u64 io_size, int uptodate)
     380             : {
     381             :         struct btrfs_ordered_inode_tree *tree;
     382             :         struct rb_node *node;
     383     1309068 :         struct btrfs_ordered_extent *entry = NULL;
     384             :         unsigned long flags;
     385             :         int ret;
     386             : 
     387     1309063 :         tree = &BTRFS_I(inode)->ordered_tree;
     388     1309063 :         spin_lock_irqsave(&tree->lock, flags);
     389     1309071 :         if (cached && *cached) {
     390             :                 entry = *cached;
     391             :                 goto have_entry;
     392             :         }
     393             : 
     394     1309068 :         node = tree_search(tree, file_offset);
     395     1309065 :         if (!node) {
     396             :                 ret = 1;
     397             :                 goto out;
     398             :         }
     399             : 
     400     1309065 :         entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
     401             : have_entry:
     402     1309068 :         if (!offset_in_entry(entry, file_offset)) {
     403             :                 ret = 1;
     404             :                 goto out;
     405             :         }
     406             : 
     407     1309068 :         if (io_size > entry->bytes_left) {
     408           0 :                 btrfs_crit(BTRFS_I(inode)->root->fs_info,
     409             :                            "bad ordered accounting left %llu size %llu",
     410             :                        entry->bytes_left, io_size);
     411             :         }
     412     1309069 :         entry->bytes_left -= io_size;
     413     1309069 :         if (!uptodate)
     414             :                 set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
     415             : 
     416     1309069 :         if (entry->bytes_left == 0) {
     417       26245 :                 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
     418       26245 :                 if (waitqueue_active(&entry->wait))
     419         152 :                         wake_up(&entry->wait);
     420             :         } else {
     421             :                 ret = 1;
     422             :         }
     423             : out:
     424     1309069 :         if (!ret && cached && entry) {
     425       26245 :                 *cached = entry;
     426       26245 :                 atomic_inc(&entry->refs);
     427             :         }
     428             :         spin_unlock_irqrestore(&tree->lock, flags);
     429     1309066 :         return ret == 0;
     430             : }
     431             : 
     432             : /* Needs to either be called under a log transaction or the log_mutex */
     433        1824 : void btrfs_get_logged_extents(struct inode *inode,
     434             :                               struct list_head *logged_list)
     435             : {
     436             :         struct btrfs_ordered_inode_tree *tree;
     437             :         struct btrfs_ordered_extent *ordered;
     438             :         struct rb_node *n;
     439             : 
     440             :         tree = &BTRFS_I(inode)->ordered_tree;
     441             :         spin_lock_irq(&tree->lock);
     442        2999 :         for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
     443             :                 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
     444        2350 :                 if (!list_empty(&ordered->log_list))
     445           0 :                         continue;
     446             :                 list_add_tail(&ordered->log_list, logged_list);
     447        1175 :                 atomic_inc(&ordered->refs);
     448             :         }
     449             :         spin_unlock_irq(&tree->lock);
     450        1824 : }
     451             : 
     452           0 : void btrfs_put_logged_extents(struct list_head *logged_list)
     453             : {
     454             :         struct btrfs_ordered_extent *ordered;
     455             : 
     456           0 :         while (!list_empty(logged_list)) {
     457           0 :                 ordered = list_first_entry(logged_list,
     458             :                                            struct btrfs_ordered_extent,
     459             :                                            log_list);
     460           0 :                 list_del_init(&ordered->log_list);
     461           0 :                 btrfs_put_ordered_extent(ordered);
     462             :         }
     463           0 : }
     464             : 
     465        1824 : void btrfs_submit_logged_extents(struct list_head *logged_list,
     466             :                                  struct btrfs_root *log)
     467             : {
     468        1824 :         int index = log->log_transid % 2;
     469             : 
     470        1824 :         spin_lock_irq(&log->log_extents_lock[index]);
     471        1824 :         list_splice_tail(logged_list, &log->logged_list[index]);
     472             :         spin_unlock_irq(&log->log_extents_lock[index]);
     473        1824 : }
     474             : 
     475        1477 : void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
     476             : {
     477             :         struct btrfs_ordered_extent *ordered;
     478        1477 :         int index = transid % 2;
     479             : 
     480        1477 :         spin_lock_irq(&log->log_extents_lock[index]);
     481        5304 :         while (!list_empty(&log->logged_list[index])) {
     482        1175 :                 ordered = list_first_entry(&log->logged_list[index],
     483             :                                            struct btrfs_ordered_extent,
     484             :                                            log_list);
     485        1175 :                 list_del_init(&ordered->log_list);
     486             :                 spin_unlock_irq(&log->log_extents_lock[index]);
     487             : 
     488        1175 :                 if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
     489             :                     !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
     490           0 :                         struct inode *inode = ordered->inode;
     491           0 :                         u64 start = ordered->file_offset;
     492           0 :                         u64 end = ordered->file_offset + ordered->len - 1;
     493             : 
     494           0 :                         WARN_ON(!inode);
     495           0 :                         filemap_fdatawrite_range(inode->i_mapping, start, end);
     496             :                 }
     497        1175 :                 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
     498             :                                                    &ordered->flags));
     499             : 
     500        1175 :                 btrfs_put_ordered_extent(ordered);
     501             :                 spin_lock_irq(&log->log_extents_lock[index]);
     502             :         }
     503             :         spin_unlock_irq(&log->log_extents_lock[index]);
     504        1477 : }
     505             : 
     506         898 : void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
     507             : {
     508             :         struct btrfs_ordered_extent *ordered;
     509         898 :         int index = transid % 2;
     510             : 
     511         898 :         spin_lock_irq(&log->log_extents_lock[index]);
     512        1796 :         while (!list_empty(&log->logged_list[index])) {
     513           0 :                 ordered = list_first_entry(&log->logged_list[index],
     514             :                                            struct btrfs_ordered_extent,
     515             :                                            log_list);
     516           0 :                 list_del_init(&ordered->log_list);
     517             :                 spin_unlock_irq(&log->log_extents_lock[index]);
     518           0 :                 btrfs_put_ordered_extent(ordered);
     519             :                 spin_lock_irq(&log->log_extents_lock[index]);
     520             :         }
     521             :         spin_unlock_irq(&log->log_extents_lock[index]);
     522         898 : }
     523             : 
     524             : /*
     525             :  * used to drop a reference on an ordered extent.  This will free
     526             :  * the extent if the last reference is dropped
     527             :  */
     528      198897 : void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
     529             : {
     530             :         struct list_head *cur;
     531             :         struct btrfs_ordered_sum *sum;
     532             : 
     533      198897 :         trace_btrfs_ordered_extent_put(entry->inode, entry);
     534             : 
     535      397816 :         if (atomic_dec_and_test(&entry->refs)) {
     536       51505 :                 if (entry->inode)
     537       51505 :                         btrfs_add_delayed_iput(entry->inode);
     538      284776 :                 while (!list_empty(&entry->list)) {
     539             :                         cur = entry->list.next;
     540       90883 :                         sum = list_entry(cur, struct btrfs_ordered_sum, list);
     541       90883 :                         list_del(&sum->list);
     542       90883 :                         kfree(sum);
     543             :                 }
     544       51505 :                 kmem_cache_free(btrfs_ordered_extent_cache, entry);
     545             :         }
     546      198925 : }
     547             : 
     548             : /*
     549             :  * remove an ordered extent from the tree.  No references are dropped
     550             :  * and waiters are woken up.
     551             :  */
     552       51505 : void btrfs_remove_ordered_extent(struct inode *inode,
     553             :                                  struct btrfs_ordered_extent *entry)
     554             : {
     555             :         struct btrfs_ordered_inode_tree *tree;
     556       51505 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     557             :         struct rb_node *node;
     558             : 
     559             :         tree = &BTRFS_I(inode)->ordered_tree;
     560             :         spin_lock_irq(&tree->lock);
     561       51505 :         node = &entry->rb_node;
     562       51505 :         rb_erase(node, &tree->tree);
     563       51505 :         if (tree->last == node)
     564       41164 :                 tree->last = NULL;
     565             :         set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
     566             :         spin_unlock_irq(&tree->lock);
     567             : 
     568             :         spin_lock(&root->ordered_extent_lock);
     569       51505 :         list_del_init(&entry->root_extent_list);
     570       51505 :         root->nr_ordered_extents--;
     571             : 
     572       51505 :         trace_btrfs_ordered_extent_remove(inode, entry);
     573             : 
     574       51505 :         if (!root->nr_ordered_extents) {
     575       30685 :                 spin_lock(&root->fs_info->ordered_root_lock);
     576       61370 :                 BUG_ON(list_empty(&root->ordered_root));
     577             :                 list_del_init(&root->ordered_root);
     578       30685 :                 spin_unlock(&root->fs_info->ordered_root_lock);
     579             :         }
     580             :         spin_unlock(&root->ordered_extent_lock);
     581       51505 :         wake_up(&entry->wait);
     582       51504 : }
     583             : 
     584        1053 : static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
     585             : {
     586             :         struct btrfs_ordered_extent *ordered;
     587             : 
     588        1053 :         ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
     589        1053 :         btrfs_start_ordered_extent(ordered->inode, ordered, 1);
     590        1049 :         complete(&ordered->completion);
     591        1052 : }
     592             : 
     593             : /*
     594             :  * wait for all the ordered extents in a root.  This is done when balancing
     595             :  * space between drives.
     596             :  */
     597         193 : int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
     598             : {
     599             :         struct list_head splice, works;
     600             :         struct btrfs_ordered_extent *ordered, *next;
     601             :         int count = 0;
     602             : 
     603             :         INIT_LIST_HEAD(&splice);
     604             :         INIT_LIST_HEAD(&works);
     605             : 
     606         193 :         mutex_lock(&root->ordered_extent_mutex);
     607             :         spin_lock(&root->ordered_extent_lock);
     608         193 :         list_splice_init(&root->ordered_extents, &splice);
     609        1246 :         while (!list_empty(&splice) && nr) {
     610             :                 ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
     611             :                                            root_extent_list);
     612        1053 :                 list_move_tail(&ordered->root_extent_list,
     613             :                                &root->ordered_extents);
     614        1053 :                 atomic_inc(&ordered->refs);
     615             :                 spin_unlock(&root->ordered_extent_lock);
     616             : 
     617        1053 :                 btrfs_init_work(&ordered->flush_work,
     618             :                                 btrfs_flush_delalloc_helper,
     619             :                                 btrfs_run_ordered_extent_work, NULL, NULL);
     620        1053 :                 list_add_tail(&ordered->work_list, &works);
     621        1053 :                 btrfs_queue_work(root->fs_info->flush_workers,
     622             :                                  &ordered->flush_work);
     623             : 
     624        1053 :                 cond_resched();
     625             :                 spin_lock(&root->ordered_extent_lock);
     626        1053 :                 if (nr != -1)
     627          14 :                         nr--;
     628        1053 :                 count++;
     629             :         }
     630             :         list_splice_tail(&splice, &root->ordered_extents);
     631             :         spin_unlock(&root->ordered_extent_lock);
     632             : 
     633        1246 :         list_for_each_entry_safe(ordered, next, &works, work_list) {
     634             :                 list_del_init(&ordered->work_list);
     635        1053 :                 wait_for_completion(&ordered->completion);
     636        1053 :                 btrfs_put_ordered_extent(ordered);
     637        1053 :                 cond_resched();
     638             :         }
     639         193 :         mutex_unlock(&root->ordered_extent_mutex);
     640             : 
     641         193 :         return count;
     642             : }
     643             : 
     644        2492 : void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
     645             : {
     646             :         struct btrfs_root *root;
     647             :         struct list_head splice;
     648             :         int done;
     649             : 
     650             :         INIT_LIST_HEAD(&splice);
     651             : 
     652        2492 :         mutex_lock(&fs_info->ordered_operations_mutex);
     653             :         spin_lock(&fs_info->ordered_root_lock);
     654        2493 :         list_splice_init(&fs_info->ordered_roots, &splice);
     655        2540 :         while (!list_empty(&splice) && nr) {
     656          47 :                 root = list_first_entry(&splice, struct btrfs_root,
     657             :                                         ordered_root);
     658          47 :                 root = btrfs_grab_fs_root(root);
     659          47 :                 BUG_ON(!root);
     660          47 :                 list_move_tail(&root->ordered_root,
     661             :                                &fs_info->ordered_roots);
     662             :                 spin_unlock(&fs_info->ordered_root_lock);
     663             : 
     664          47 :                 done = btrfs_wait_ordered_extents(root, nr);
     665          47 :                 btrfs_put_fs_root(root);
     666             : 
     667             :                 spin_lock(&fs_info->ordered_root_lock);
     668          47 :                 if (nr != -1) {
     669           3 :                         nr -= done;
     670           3 :                         WARN_ON(nr < 0);
     671             :                 }
     672             :         }
     673             :         list_splice_tail(&splice, &fs_info->ordered_roots);
     674             :         spin_unlock(&fs_info->ordered_root_lock);
     675        2493 :         mutex_unlock(&fs_info->ordered_operations_mutex);
     676        2493 : }
     677             : 
     678             : /*
     679             :  * Used to start IO or wait for a given ordered extent to finish.
     680             :  *
     681             :  * If wait is one, this effectively waits on page writeback for all the pages
     682             :  * in the extent, and it waits on the io completion code to insert
     683             :  * metadata into the btree corresponding to the extent
     684             :  */
     685        3875 : void btrfs_start_ordered_extent(struct inode *inode,
     686             :                                        struct btrfs_ordered_extent *entry,
     687             :                                        int wait)
     688             : {
     689        3875 :         u64 start = entry->file_offset;
     690        3875 :         u64 end = start + entry->len - 1;
     691             : 
     692        3875 :         trace_btrfs_ordered_extent_start(inode, entry);
     693             : 
     694             :         /*
     695             :          * pages in the range can be dirty, clean or writeback.  We
     696             :          * start IO on any dirty ones so the wait doesn't stall waiting
     697             :          * for the flusher thread to find them
     698             :          */
     699        3875 :         if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
     700        3875 :                 filemap_fdatawrite_range(inode->i_mapping, start, end);
     701        3875 :         if (wait) {
     702       14832 :                 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
     703             :                                                  &entry->flags));
     704             :         }
     705        3874 : }
     706             : 
     707             : /*
     708             :  * Used to wait on ordered extents across a large range of bytes.
     709             :  */
     710       17638 : int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
     711             : {
     712             :         int ret = 0;
     713             :         u64 end;
     714             :         u64 orig_end;
     715             :         struct btrfs_ordered_extent *ordered;
     716             : 
     717       17638 :         if (start + len < start) {
     718             :                 orig_end = INT_LIMIT(loff_t);
     719             :         } else {
     720       15421 :                 orig_end = start + len - 1;
     721       15421 :                 if (orig_end > INT_LIMIT(loff_t))
     722             :                         orig_end = INT_LIMIT(loff_t);
     723             :         }
     724             : 
     725             :         /* start IO across the range first to instantiate any delalloc
     726             :          * extents
     727             :          */
     728       17638 :         ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
     729       17637 :         if (ret)
     730             :                 return ret;
     731             :         /*
     732             :          * So with compression we will find and lock a dirty page and clear the
     733             :          * first one as dirty, setup an async extent, and immediately return
     734             :          * with the entire range locked but with nobody actually marked with
     735             :          * writeback.  So we can't just filemap_write_and_wait_range() and
     736             :          * expect it to work since it will just kick off a thread to do the
     737             :          * actual work.  So we need to call filemap_fdatawrite_range _again_
     738             :          * since it will wait on the page lock, which won't be unlocked until
     739             :          * after the pages have been marked as writeback and so we're good to go
     740             :          * from there.  We have to do this otherwise we'll miss the ordered
     741             :          * extents and that results in badness.  Please Josef, do not think you
     742             :          * know better and pull this out at some point in the future, it is
     743             :          * right and you are wrong.
     744             :          */
     745       17637 :         if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
     746             :                      &BTRFS_I(inode)->runtime_flags)) {
     747          90 :                 ret = filemap_fdatawrite_range(inode->i_mapping, start,
     748             :                                                orig_end);
     749          90 :                 if (ret)
     750             :                         return ret;
     751             :         }
     752       17637 :         ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
     753       17638 :         if (ret)
     754             :                 return ret;
     755             : 
     756             :         end = orig_end;
     757             :         while (1) {
     758       17855 :                 ordered = btrfs_lookup_first_ordered_extent(inode, end);
     759       17855 :                 if (!ordered)
     760             :                         break;
     761        2809 :                 if (ordered->file_offset > orig_end) {
     762           0 :                         btrfs_put_ordered_extent(ordered);
     763           0 :                         break;
     764             :                 }
     765        2809 :                 if (ordered->file_offset + ordered->len <= start) {
     766           4 :                         btrfs_put_ordered_extent(ordered);
     767           4 :                         break;
     768             :                 }
     769        2805 :                 btrfs_start_ordered_extent(inode, ordered, 1);
     770        2805 :                 end = ordered->file_offset;
     771        2805 :                 if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
     772             :                         ret = -EIO;
     773        2805 :                 btrfs_put_ordered_extent(ordered);
     774        2805 :                 if (ret || end == 0 || end == start)
     775             :                         break;
     776         217 :                 end--;
     777         217 :         }
     778       17638 :         return ret;
     779             : }
     780             : 
     781             : /*
     782             :  * find an ordered extent corresponding to file_offset.  return NULL if
     783             :  * nothing is found, otherwise take a reference on the extent and return it
     784             :  */
     785     1919242 : struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
     786             :                                                          u64 file_offset)
     787             : {
     788             :         struct btrfs_ordered_inode_tree *tree;
     789             :         struct rb_node *node;
     790       90972 :         struct btrfs_ordered_extent *entry = NULL;
     791             : 
     792     1919242 :         tree = &BTRFS_I(inode)->ordered_tree;
     793             :         spin_lock_irq(&tree->lock);
     794     1919295 :         node = tree_search(tree, file_offset);
     795     1919292 :         if (!node)
     796             :                 goto out;
     797             : 
     798       90972 :         entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
     799       90972 :         if (!offset_in_entry(entry, file_offset))
     800             :                 entry = NULL;
     801       90972 :         if (entry)
     802       90889 :                 atomic_inc(&entry->refs);
     803             : out:
     804             :         spin_unlock_irq(&tree->lock);
     805     1919296 :         return entry;
     806             : }
     807             : 
     808             : /* Since the DIO code tries to lock a wide area we need to look for any ordered
     809             :  * extents that exist in the range, rather than just the start of the range.
     810             :  */
     811      118648 : struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
     812             :                                                         u64 file_offset,
     813             :                                                         u64 len)
     814             : {
     815             :         struct btrfs_ordered_inode_tree *tree;
     816             :         struct rb_node *node;
     817        2215 :         struct btrfs_ordered_extent *entry = NULL;
     818             : 
     819      118648 :         tree = &BTRFS_I(inode)->ordered_tree;
     820             :         spin_lock_irq(&tree->lock);
     821      118653 :         node = tree_search(tree, file_offset);
     822      118652 :         if (!node) {
     823      116440 :                 node = tree_search(tree, file_offset + len);
     824      116440 :                 if (!node)
     825             :                         goto out;
     826             :         }
     827             : 
     828             :         while (1) {
     829        2215 :                 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
     830        2215 :                 if (range_overlaps(entry, file_offset, len))
     831             :                         break;
     832             : 
     833        2207 :                 if (entry->file_offset >= file_offset + len) {
     834             :                         entry = NULL;
     835             :                         break;
     836             :                 }
     837             :                 entry = NULL;
     838        2194 :                 node = rb_next(node);
     839        2195 :                 if (!node)
     840             :                         break;
     841             :         }
     842             : out:
     843      118653 :         if (entry)
     844           8 :                 atomic_inc(&entry->refs);
     845             :         spin_unlock_irq(&tree->lock);
     846      118651 :         return entry;
     847             : }
     848             : 
     849             : /*
     850             :  * lookup and return any extent before 'file_offset'.  NULL is returned
     851             :  * if none is found
     852             :  */
     853             : struct btrfs_ordered_extent *
     854       46076 : btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
     855             : {
     856             :         struct btrfs_ordered_inode_tree *tree;
     857             :         struct rb_node *node;
     858             :         struct btrfs_ordered_extent *entry = NULL;
     859             : 
     860       46076 :         tree = &BTRFS_I(inode)->ordered_tree;
     861             :         spin_lock_irq(&tree->lock);
     862       46076 :         node = tree_search(tree, file_offset);
     863       46076 :         if (!node)
     864             :                 goto out;
     865             : 
     866        2812 :         entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
     867        2812 :         atomic_inc(&entry->refs);
     868             : out:
     869             :         spin_unlock_irq(&tree->lock);
     870       46076 :         return entry;
     871             : }
     872             : 
     873             : /*
     874             :  * After an extent is done, call this to conditionally update the on disk
     875             :  * i_size.  i_size is updated to cover any fully written part of the file.
     876             :  */
     877      131334 : int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
     878       51505 :                                 struct btrfs_ordered_extent *ordered)
     879             : {
     880       65667 :         struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
     881             :         u64 disk_i_size;
     882             :         u64 new_i_size;
     883       65667 :         u64 i_size = i_size_read(inode);
     884             :         struct rb_node *node;
     885             :         struct rb_node *prev = NULL;
     886         253 :         struct btrfs_ordered_extent *test;
     887             :         int ret = 1;
     888             : 
     889             :         spin_lock_irq(&tree->lock);
     890       65667 :         if (ordered) {
     891             :                 offset = entry_end(ordered);
     892       51505 :                 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
     893           0 :                         offset = min(offset,
     894             :                                      ordered->file_offset +
     895             :                                      ordered->truncated_len);
     896             :         } else {
     897       14162 :                 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
     898             :         }
     899       65667 :         disk_i_size = BTRFS_I(inode)->disk_i_size;
     900             : 
     901             :         /* truncate file */
     902       65667 :         if (disk_i_size > i_size) {
     903         560 :                 BTRFS_I(inode)->disk_i_size = i_size;
     904             :                 ret = 0;
     905         560 :                 goto out;
     906             :         }
     907             : 
     908             :         /*
     909             :          * if the disk i_size is already at the inode->i_size, or
     910             :          * this ordered extent is inside the disk i_size, we're done
     911             :          */
     912       65107 :         if (disk_i_size == i_size)
     913             :                 goto out;
     914             : 
     915             :         /*
     916             :          * We still need to update disk_i_size if outstanding_isize is greater
     917             :          * than disk_i_size.
     918             :          */
     919       20819 :         if (offset <= disk_i_size &&
     920          79 :             (!ordered || ordered->outstanding_isize <= disk_i_size))
     921             :                 goto out;
     922             : 
     923             :         /*
     924             :          * walk backward from this ordered extent to disk_i_size.
     925             :          * if we find an ordered extent then we can't update disk i_size
     926             :          * yet
     927             :          */
     928       20740 :         if (ordered) {
     929       14354 :                 node = rb_prev(&ordered->rb_node);
     930             :         } else {
     931        6386 :                 prev = tree_search(tree, offset);
     932             :                 /*
     933             :                  * we insert file extents without involving ordered struct,
     934             :                  * so there should be no ordered struct cover this offset
     935             :                  */
     936        6386 :                 if (prev) {
     937             :                         test = rb_entry(prev, struct btrfs_ordered_extent,
     938             :                                         rb_node);
     939           6 :                         BUG_ON(offset_in_entry(test, offset));
     940             :                 }
     941             :                 node = prev;
     942             :         }
     943          24 :         for (; node; node = rb_prev(node)) {
     944             :                 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
     945             : 
     946             :                 /* We treat this entry as if it doesnt exist */
     947         305 :                 if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
     948          24 :                         continue;
     949         281 :                 if (test->file_offset + test->len <= disk_i_size)
     950             :                         break;
     951         247 :                 if (test->file_offset >= i_size)
     952             :                         break;
     953         247 :                 if (entry_end(test) > disk_i_size) {
     954             :                         /*
     955             :                          * we don't update disk_i_size now, so record this
     956             :                          * undealt i_size. Or we will not know the real
     957             :                          * i_size.
     958             :                          */
     959         247 :                         if (test->outstanding_isize < offset)
     960         247 :                                 test->outstanding_isize = offset;
     961         488 :                         if (ordered &&
     962         241 :                             ordered->outstanding_isize >
     963         241 :                             test->outstanding_isize)
     964          15 :                                 test->outstanding_isize =
     965             :                                                 ordered->outstanding_isize;
     966             :                         goto out;
     967             :                 }
     968             :         }
     969       20493 :         new_i_size = min_t(u64, offset, i_size);
     970             : 
     971             :         /*
     972             :          * Some ordered extents may completed before the current one, and
     973             :          * we hold the real i_size in ->outstanding_isize.
     974             :          */
     975       20493 :         if (ordered && ordered->outstanding_isize > new_i_size)
     976         180 :                 new_i_size = min_t(u64, ordered->outstanding_isize, i_size);
     977       20493 :         BTRFS_I(inode)->disk_i_size = new_i_size;
     978             :         ret = 0;
     979             : out:
     980             :         /*
     981             :          * We need to do this because we can't remove ordered extents until
     982             :          * after the i_disk_size has been updated and then the inode has been
     983             :          * updated to reflect the change, so we need to tell anybody who finds
     984             :          * this ordered extent that we've already done all the real work, we
     985             :          * just haven't completed all the other work.
     986             :          */
     987       65667 :         if (ordered)
     988             :                 set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags);
     989             :         spin_unlock_irq(&tree->lock);
     990       65667 :         return ret;
     991             : }
     992             : 
     993             : /*
     994             :  * search the ordered extents for one corresponding to 'offset' and
     995             :  * try to find a checksum.  This is used because we allow pages to
     996             :  * be reclaimed before their checksum is actually put into the btree
     997             :  */
     998       26589 : int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
     999             :                            u32 *sum, int len)
    1000             : {
    1001             :         struct btrfs_ordered_sum *ordered_sum;
    1002             :         struct btrfs_ordered_extent *ordered;
    1003             :         struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
    1004             :         unsigned long num_sectors;
    1005             :         unsigned long i;
    1006       26589 :         u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
    1007             :         int index = 0;
    1008             : 
    1009       26589 :         ordered = btrfs_lookup_ordered_extent(inode, offset);
    1010       26589 :         if (!ordered)
    1011             :                 return 0;
    1012             : 
    1013             :         spin_lock_irq(&tree->lock);
    1014           0 :         list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
    1015           0 :                 if (disk_bytenr >= ordered_sum->bytenr &&
    1016           0 :                     disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
    1017           0 :                         i = (disk_bytenr - ordered_sum->bytenr) >>
    1018           0 :                             inode->i_sb->s_blocksize_bits;
    1019           0 :                         num_sectors = ordered_sum->len >>
    1020             :                                       inode->i_sb->s_blocksize_bits;
    1021           0 :                         num_sectors = min_t(int, len - index, num_sectors - i);
    1022           0 :                         memcpy(sum + index, ordered_sum->sums + i,
    1023             :                                num_sectors);
    1024             : 
    1025           0 :                         index += (int)num_sectors;
    1026           0 :                         if (index == len)
    1027             :                                 goto out;
    1028           0 :                         disk_bytenr += num_sectors * sectorsize;
    1029             :                 }
    1030             :         }
    1031             : out:
    1032             :         spin_unlock_irq(&tree->lock);
    1033           0 :         btrfs_put_ordered_extent(ordered);
    1034           0 :         return index;
    1035             : }
    1036             : 
    1037           0 : int __init ordered_data_init(void)
    1038             : {
    1039           0 :         btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
    1040             :                                      sizeof(struct btrfs_ordered_extent), 0,
    1041             :                                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
    1042             :                                      NULL);
    1043           0 :         if (!btrfs_ordered_extent_cache)
    1044             :                 return -ENOMEM;
    1045             : 
    1046           0 :         return 0;
    1047             : }
    1048             : 
    1049           0 : void ordered_data_exit(void)
    1050             : {
    1051           0 :         if (btrfs_ordered_extent_cache)
    1052           0 :                 kmem_cache_destroy(btrfs_ordered_extent_cache);
    1053           0 : }

Generated by: LCOV version 1.10