LCOV - code coverage report
Current view: top level - fs/btrfs - compression.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 245 377 65.0 %
Date: 2014-11-28 Functions: 13 17 76.5 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2008 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : 
      19             : #include <linux/kernel.h>
      20             : #include <linux/bio.h>
      21             : #include <linux/buffer_head.h>
      22             : #include <linux/file.h>
      23             : #include <linux/fs.h>
      24             : #include <linux/pagemap.h>
      25             : #include <linux/highmem.h>
      26             : #include <linux/time.h>
      27             : #include <linux/init.h>
      28             : #include <linux/string.h>
      29             : #include <linux/backing-dev.h>
      30             : #include <linux/mpage.h>
      31             : #include <linux/swap.h>
      32             : #include <linux/writeback.h>
      33             : #include <linux/bit_spinlock.h>
      34             : #include <linux/slab.h>
      35             : #include "ctree.h"
      36             : #include "disk-io.h"
      37             : #include "transaction.h"
      38             : #include "btrfs_inode.h"
      39             : #include "volumes.h"
      40             : #include "ordered-data.h"
      41             : #include "compression.h"
      42             : #include "extent_io.h"
      43             : #include "extent_map.h"
      44             : 
      45             : struct compressed_bio {
      46             :         /* number of bios pending for this compressed extent */
      47             :         atomic_t pending_bios;
      48             : 
      49             :         /* the pages with the compressed data on them */
      50             :         struct page **compressed_pages;
      51             : 
      52             :         /* inode that owns this data */
      53             :         struct inode *inode;
      54             : 
      55             :         /* starting offset in the inode for our pages */
      56             :         u64 start;
      57             : 
      58             :         /* number of bytes in the inode we're working on */
      59             :         unsigned long len;
      60             : 
      61             :         /* number of bytes on disk */
      62             :         unsigned long compressed_len;
      63             : 
      64             :         /* the compression algorithm for this bio */
      65             :         int compress_type;
      66             : 
      67             :         /* number of compressed pages in the array */
      68             :         unsigned long nr_pages;
      69             : 
      70             :         /* IO errors */
      71             :         int errors;
      72             :         int mirror_num;
      73             : 
      74             :         /* for reads, this is the bio we are copying the data into */
      75             :         struct bio *orig_bio;
      76             : 
      77             :         /*
      78             :          * the start of a variable length array of checksums only
      79             :          * used by reads
      80             :          */
      81             :         u32 sums;
      82             : };
      83             : 
      84             : static int btrfs_decompress_biovec(int type, struct page **pages_in,
      85             :                                    u64 disk_start, struct bio_vec *bvec,
      86             :                                    int vcnt, size_t srclen);
      87             : 
      88             : static inline int compressed_bio_size(struct btrfs_root *root,
      89             :                                       unsigned long disk_size)
      90             : {
      91         195 :         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
      92             : 
      93         195 :         return sizeof(struct compressed_bio) +
      94         195 :                 ((disk_size + root->sectorsize - 1) / root->sectorsize) *
      95             :                 csum_size;
      96             : }
      97             : 
      98         195 : static struct bio *compressed_bio_alloc(struct block_device *bdev,
      99             :                                         u64 first_byte, gfp_t gfp_flags)
     100             : {
     101             :         int nr_vecs;
     102             : 
     103         195 :         nr_vecs = bio_get_nr_vecs(bdev);
     104         195 :         return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
     105             : }
     106             : 
     107          42 : static int check_compressed_csum(struct inode *inode,
     108             :                                  struct compressed_bio *cb,
     109             :                                  u64 disk_start)
     110             : {
     111             :         int ret;
     112             :         struct page *page;
     113             :         unsigned long i;
     114             :         char *kaddr;
     115             :         u32 csum;
     116          42 :         u32 *cb_sum = &cb->sums;
     117             : 
     118          42 :         if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
     119             :                 return 0;
     120             : 
     121          42 :         for (i = 0; i < cb->nr_pages; i++) {
     122          42 :                 page = cb->compressed_pages[i];
     123          42 :                 csum = ~(u32)0;
     124             : 
     125             :                 kaddr = kmap_atomic(page);
     126          42 :                 csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE);
     127          42 :                 btrfs_csum_final(csum, (char *)&csum);
     128             :                 kunmap_atomic(kaddr);
     129             : 
     130          42 :                 if (csum != *cb_sum) {
     131           0 :                         btrfs_info(BTRFS_I(inode)->root->fs_info,
     132             :                            "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
     133             :                            btrfs_ino(inode), disk_start, csum, *cb_sum,
     134             :                            cb->mirror_num);
     135             :                         ret = -EIO;
     136           0 :                         goto fail;
     137             :                 }
     138          42 :                 cb_sum++;
     139             : 
     140             :         }
     141             :         ret = 0;
     142             : fail:
     143          42 :         return ret;
     144             : }
     145             : 
     146             : /* when we finish reading compressed pages from the disk, we
     147             :  * decompress them and then run the bio end_io routines on the
     148             :  * decompressed pages (in the inode address space).
     149             :  *
     150             :  * This allows the checksumming and other IO error handling routines
     151             :  * to work normally
     152             :  *
     153             :  * The compressed pages are freed here, and it must be run
     154             :  * in process context
     155             :  */
     156          42 : static void end_compressed_bio_read(struct bio *bio, int err)
     157             : {
     158          42 :         struct compressed_bio *cb = bio->bi_private;
     159             :         struct inode *inode;
     160             :         struct page *page;
     161             :         unsigned long index;
     162             :         int ret;
     163             : 
     164          42 :         if (err)
     165           0 :                 cb->errors = 1;
     166             : 
     167             :         /* if there are more bios still pending for this compressed
     168             :          * extent, just exit
     169             :          */
     170          84 :         if (!atomic_dec_and_test(&cb->pending_bios))
     171             :                 goto out;
     172             : 
     173          42 :         inode = cb->inode;
     174          42 :         ret = check_compressed_csum(inode, cb,
     175          42 :                                     (u64)bio->bi_iter.bi_sector << 9);
     176          42 :         if (ret)
     177             :                 goto csum_failed;
     178             : 
     179             :         /* ok, we're the last bio for this extent, lets start
     180             :          * the decompression.
     181             :          */
     182          84 :         ret = btrfs_decompress_biovec(cb->compress_type,
     183             :                                       cb->compressed_pages,
     184             :                                       cb->start,
     185             :                                       cb->orig_bio->bi_io_vec,
     186          42 :                                       cb->orig_bio->bi_vcnt,
     187             :                                       cb->compressed_len);
     188             : csum_failed:
     189          42 :         if (ret)
     190           0 :                 cb->errors = 1;
     191             : 
     192             :         /* release the compressed pages */
     193             :         index = 0;
     194          42 :         for (index = 0; index < cb->nr_pages; index++) {
     195          42 :                 page = cb->compressed_pages[index];
     196          42 :                 page->mapping = NULL;
     197          42 :                 page_cache_release(page);
     198             :         }
     199             : 
     200             :         /* do io completion on the original bio */
     201          42 :         if (cb->errors) {
     202           0 :                 bio_io_error(cb->orig_bio);
     203             :         } else {
     204             :                 int i;
     205             :                 struct bio_vec *bvec;
     206             : 
     207             :                 /*
     208             :                  * we have verified the checksum already, set page
     209             :                  * checked so the end_io handlers know about it
     210             :                  */
     211         374 :                 bio_for_each_segment_all(bvec, cb->orig_bio, i)
     212         332 :                         SetPageChecked(bvec->bv_page);
     213             : 
     214          42 :                 bio_endio(cb->orig_bio, 0);
     215             :         }
     216             : 
     217             :         /* finally free the cb struct */
     218          42 :         kfree(cb->compressed_pages);
     219          42 :         kfree(cb);
     220             : out:
     221          42 :         bio_put(bio);
     222          42 : }
     223             : 
     224             : /*
     225             :  * Clear the writeback bits on all of the file
     226             :  * pages for a compressed write
     227             :  */
     228         153 : static noinline void end_compressed_writeback(struct inode *inode, u64 start,
     229             :                                               unsigned long ram_size)
     230             : {
     231         153 :         unsigned long index = start >> PAGE_CACHE_SHIFT;
     232         153 :         unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
     233             :         struct page *pages[16];
     234         153 :         unsigned long nr_pages = end_index - index + 1;
     235             :         int i;
     236             :         int ret;
     237             : 
     238         392 :         while (nr_pages > 0) {
     239         239 :                 ret = find_get_pages_contig(inode->i_mapping, index,
     240         239 :                                      min_t(unsigned long,
     241             :                                      nr_pages, ARRAY_SIZE(pages)), pages);
     242         238 :                 if (ret == 0) {
     243           0 :                         nr_pages -= 1;
     244           0 :                         index += 1;
     245           0 :                         continue;
     246             :                 }
     247        2942 :                 for (i = 0; i < ret; i++) {
     248        2941 :                         end_page_writeback(pages[i]);
     249        2940 :                         page_cache_release(pages[i]);
     250             :                 }
     251         239 :                 nr_pages -= ret;
     252         239 :                 index += ret;
     253             :         }
     254             :         /* the inode may be gone now */
     255         153 : }
     256             : 
     257             : /*
     258             :  * do the cleanup once all the compressed pages hit the disk.
     259             :  * This will clear writeback on the file pages and free the compressed
     260             :  * pages.
     261             :  *
     262             :  * This also calls the writeback end hooks for the file pages so that
     263             :  * metadata and checksums can be updated in the file.
     264             :  */
     265         153 : static void end_compressed_bio_write(struct bio *bio, int err)
     266             : {
     267             :         struct extent_io_tree *tree;
     268         153 :         struct compressed_bio *cb = bio->bi_private;
     269             :         struct inode *inode;
     270             :         struct page *page;
     271             :         unsigned long index;
     272             : 
     273         153 :         if (err)
     274           0 :                 cb->errors = 1;
     275             : 
     276             :         /* if there are more bios still pending for this compressed
     277             :          * extent, just exit
     278             :          */
     279         306 :         if (!atomic_dec_and_test(&cb->pending_bios))
     280             :                 goto out;
     281             : 
     282             :         /* ok, we're the last bio for this extent, step one is to
     283             :          * call back into the FS and do all the end_io operations
     284             :          */
     285         153 :         inode = cb->inode;
     286             :         tree = &BTRFS_I(inode)->io_tree;
     287         153 :         cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
     288         306 :         tree->ops->writepage_end_io_hook(cb->compressed_pages[0],
     289             :                                          cb->start,
     290         153 :                                          cb->start + cb->len - 1,
     291             :                                          NULL, 1);
     292         153 :         cb->compressed_pages[0]->mapping = NULL;
     293             : 
     294         153 :         end_compressed_writeback(inode, cb->start, cb->len);
     295             :         /* note, our inode could be gone now */
     296             : 
     297             :         /*
     298             :          * release the compressed pages, these came from alloc_page and
     299             :          * are not attached to the inode at all
     300             :          */
     301             :         index = 0;
     302         306 :         for (index = 0; index < cb->nr_pages; index++) {
     303         153 :                 page = cb->compressed_pages[index];
     304         153 :                 page->mapping = NULL;
     305         153 :                 page_cache_release(page);
     306             :         }
     307             : 
     308             :         /* finally free the cb struct */
     309         153 :         kfree(cb->compressed_pages);
     310         152 :         kfree(cb);
     311             : out:
     312         152 :         bio_put(bio);
     313         153 : }
     314             : 
     315             : /*
     316             :  * worker function to build and submit bios for previously compressed pages.
     317             :  * The corresponding pages in the inode should be marked for writeback
     318             :  * and the compressed pages should have a reference on them for dropping
     319             :  * when the IO is complete.
     320             :  *
     321             :  * This also checksums the file bytes and gets things ready for
     322             :  * the end io hooks.
     323             :  */
     324         153 : int btrfs_submit_compressed_write(struct inode *inode, u64 start,
     325             :                                  unsigned long len, u64 disk_start,
     326             :                                  unsigned long compressed_len,
     327             :                                  struct page **compressed_pages,
     328             :                                  unsigned long nr_pages)
     329             : {
     330             :         struct bio *bio = NULL;
     331         306 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     332             :         struct compressed_bio *cb;
     333             :         unsigned long bytes_left;
     334             :         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
     335             :         int pg_index = 0;
     336             :         struct page *page;
     337             :         u64 first_byte = disk_start;
     338             :         struct block_device *bdev;
     339             :         int ret;
     340         153 :         int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
     341             : 
     342         153 :         WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
     343         153 :         cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
     344         153 :         if (!cb)
     345             :                 return -ENOMEM;
     346             :         atomic_set(&cb->pending_bios, 0);
     347         153 :         cb->errors = 0;
     348         153 :         cb->inode = inode;
     349         153 :         cb->start = start;
     350         153 :         cb->len = len;
     351         153 :         cb->mirror_num = 0;
     352         153 :         cb->compressed_pages = compressed_pages;
     353         153 :         cb->compressed_len = compressed_len;
     354         153 :         cb->orig_bio = NULL;
     355         153 :         cb->nr_pages = nr_pages;
     356             : 
     357         153 :         bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
     358             : 
     359         153 :         bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
     360         153 :         if (!bio) {
     361           0 :                 kfree(cb);
     362           0 :                 return -ENOMEM;
     363             :         }
     364         153 :         bio->bi_private = cb;
     365         153 :         bio->bi_end_io = end_compressed_bio_write;
     366         153 :         atomic_inc(&cb->pending_bios);
     367             : 
     368             :         /* create and submit bios for the compressed pages */
     369             :         bytes_left = compressed_len;
     370         306 :         for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
     371         153 :                 page = compressed_pages[pg_index];
     372         153 :                 page->mapping = inode->i_mapping;
     373         153 :                 if (bio->bi_iter.bi_size)
     374           0 :                         ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
     375             :                                                            PAGE_CACHE_SIZE,
     376             :                                                            bio, 0);
     377             :                 else
     378             :                         ret = 0;
     379             : 
     380         153 :                 page->mapping = NULL;
     381         153 :                 if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
     382             :                     PAGE_CACHE_SIZE) {
     383           0 :                         bio_get(bio);
     384             : 
     385             :                         /*
     386             :                          * inc the count before we submit the bio so
     387             :                          * we know the end IO handler won't happen before
     388             :                          * we inc the count.  Otherwise, the cb might get
     389             :                          * freed before we're done setting it up
     390             :                          */
     391             :                         atomic_inc(&cb->pending_bios);
     392           0 :                         ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
     393           0 :                         BUG_ON(ret); /* -ENOMEM */
     394             : 
     395           0 :                         if (!skip_sum) {
     396           0 :                                 ret = btrfs_csum_one_bio(root, inode, bio,
     397             :                                                          start, 1);
     398           0 :                                 BUG_ON(ret); /* -ENOMEM */
     399             :                         }
     400             : 
     401           0 :                         ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
     402           0 :                         BUG_ON(ret); /* -ENOMEM */
     403             : 
     404           0 :                         bio_put(bio);
     405             : 
     406           0 :                         bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
     407           0 :                         BUG_ON(!bio);
     408           0 :                         bio->bi_private = cb;
     409           0 :                         bio->bi_end_io = end_compressed_bio_write;
     410           0 :                         bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
     411             :                 }
     412         153 :                 if (bytes_left < PAGE_CACHE_SIZE) {
     413           0 :                         btrfs_info(BTRFS_I(inode)->root->fs_info,
     414             :                                         "bytes left %lu compress len %lu nr %lu",
     415             :                                bytes_left, cb->compressed_len, cb->nr_pages);
     416             :                 }
     417         153 :                 bytes_left -= PAGE_CACHE_SIZE;
     418         153 :                 first_byte += PAGE_CACHE_SIZE;
     419         153 :                 cond_resched();
     420             :         }
     421         153 :         bio_get(bio);
     422             : 
     423         153 :         ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
     424         153 :         BUG_ON(ret); /* -ENOMEM */
     425             : 
     426         153 :         if (!skip_sum) {
     427         153 :                 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
     428         153 :                 BUG_ON(ret); /* -ENOMEM */
     429             :         }
     430             : 
     431         153 :         ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
     432         153 :         BUG_ON(ret); /* -ENOMEM */
     433             : 
     434         153 :         bio_put(bio);
     435         153 :         return 0;
     436             : }
     437             : 
     438          42 : static noinline int add_ra_bio_pages(struct inode *inode,
     439             :                                      u64 compressed_end,
     440             :                                      struct compressed_bio *cb)
     441             : {
     442             :         unsigned long end_index;
     443             :         unsigned long pg_index;
     444             :         u64 last_offset;
     445          42 :         u64 isize = i_size_read(inode);
     446             :         int ret;
     447          42 :         struct page *page;
     448             :         unsigned long nr_pages = 0;
     449           0 :         struct extent_map *em;
     450          42 :         struct address_space *mapping = inode->i_mapping;
     451             :         struct extent_map_tree *em_tree;
     452             :         struct extent_io_tree *tree;
     453             :         u64 end;
     454             :         int misses = 0;
     455             : 
     456          42 :         page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
     457          42 :         last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
     458          42 :         em_tree = &BTRFS_I(inode)->extent_tree;
     459          42 :         tree = &BTRFS_I(inode)->io_tree;
     460             : 
     461          42 :         if (isize == 0)
     462             :                 return 0;
     463             : 
     464          42 :         end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
     465             : 
     466          42 :         while (last_offset < compressed_end) {
     467           0 :                 pg_index = last_offset >> PAGE_CACHE_SHIFT;
     468             : 
     469           0 :                 if (pg_index > end_index)
     470             :                         break;
     471             : 
     472             :                 rcu_read_lock();
     473           0 :                 page = radix_tree_lookup(&mapping->page_tree, pg_index);
     474             :                 rcu_read_unlock();
     475           0 :                 if (page && !radix_tree_exceptional_entry(page)) {
     476           0 :                         misses++;
     477           0 :                         if (misses > 4)
     478             :                                 break;
     479             :                         goto next;
     480             :                 }
     481             : 
     482           0 :                 page = __page_cache_alloc(mapping_gfp_mask(mapping) &
     483             :                                                                 ~__GFP_FS);
     484           0 :                 if (!page)
     485             :                         break;
     486             : 
     487           0 :                 if (add_to_page_cache_lru(page, mapping, pg_index,
     488             :                                                                 GFP_NOFS)) {
     489           0 :                         page_cache_release(page);
     490             :                         goto next;
     491             :                 }
     492             : 
     493           0 :                 end = last_offset + PAGE_CACHE_SIZE - 1;
     494             :                 /*
     495             :                  * at this point, we have a locked page in the page cache
     496             :                  * for these bytes in the file.  But, we have to make
     497             :                  * sure they map to this compressed extent on disk.
     498             :                  */
     499           0 :                 set_page_extent_mapped(page);
     500           0 :                 lock_extent(tree, last_offset, end);
     501           0 :                 read_lock(&em_tree->lock);
     502           0 :                 em = lookup_extent_mapping(em_tree, last_offset,
     503             :                                            PAGE_CACHE_SIZE);
     504             :                 read_unlock(&em_tree->lock);
     505             : 
     506           0 :                 if (!em || last_offset < em->start ||
     507           0 :                     (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
     508           0 :                     (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
     509           0 :                         free_extent_map(em);
     510           0 :                         unlock_extent(tree, last_offset, end);
     511           0 :                         unlock_page(page);
     512           0 :                         page_cache_release(page);
     513             :                         break;
     514             :                 }
     515           0 :                 free_extent_map(em);
     516             : 
     517           0 :                 if (page->index == end_index) {
     518             :                         char *userpage;
     519           0 :                         size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
     520             : 
     521           0 :                         if (zero_offset) {
     522             :                                 int zeros;
     523           0 :                                 zeros = PAGE_CACHE_SIZE - zero_offset;
     524             :                                 userpage = kmap_atomic(page);
     525           0 :                                 memset(userpage + zero_offset, 0, zeros);
     526             :                                 flush_dcache_page(page);
     527             :                                 kunmap_atomic(userpage);
     528             :                         }
     529             :                 }
     530             : 
     531           0 :                 ret = bio_add_page(cb->orig_bio, page,
     532             :                                    PAGE_CACHE_SIZE, 0);
     533             : 
     534           0 :                 if (ret == PAGE_CACHE_SIZE) {
     535             :                         nr_pages++;
     536           0 :                         page_cache_release(page);
     537             :                 } else {
     538           0 :                         unlock_extent(tree, last_offset, end);
     539           0 :                         unlock_page(page);
     540           0 :                         page_cache_release(page);
     541             :                         break;
     542             :                 }
     543             : next:
     544           0 :                 last_offset += PAGE_CACHE_SIZE;
     545             :         }
     546             :         return 0;
     547             : }
     548             : 
     549             : /*
     550             :  * for a compressed read, the bio we get passed has all the inode pages
     551             :  * in it.  We don't actually do IO on those pages but allocate new ones
     552             :  * to hold the compressed pages on disk.
     553             :  *
     554             :  * bio->bi_iter.bi_sector points to the compressed extent on disk
     555             :  * bio->bi_io_vec points to all of the inode pages
     556             :  * bio->bi_vcnt is a count of pages
     557             :  *
     558             :  * After the compressed pages are read, we copy the bytes into the
     559             :  * bio we were passed and then call the bio end_io calls
     560             :  */
     561          42 : int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
     562             :                                  int mirror_num, unsigned long bio_flags)
     563             : {
     564             :         struct extent_io_tree *tree;
     565             :         struct extent_map_tree *em_tree;
     566             :         struct compressed_bio *cb;
     567          84 :         struct btrfs_root *root = BTRFS_I(inode)->root;
     568          42 :         unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
     569             :         unsigned long compressed_len;
     570             :         unsigned long nr_pages;
     571             :         unsigned long pg_index;
     572             :         struct page *page;
     573             :         struct block_device *bdev;
     574             :         struct bio *comp_bio;
     575          42 :         u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
     576             :         u64 em_len;
     577             :         u64 em_start;
     578             :         struct extent_map *em;
     579             :         int ret = -ENOMEM;
     580             :         int faili = 0;
     581             :         u32 *sums;
     582             : 
     583             :         tree = &BTRFS_I(inode)->io_tree;
     584          42 :         em_tree = &BTRFS_I(inode)->extent_tree;
     585             : 
     586             :         /* we need the actual starting offset of this extent in the file */
     587          42 :         read_lock(&em_tree->lock);
     588          42 :         em = lookup_extent_mapping(em_tree,
     589          42 :                                    page_offset(bio->bi_io_vec->bv_page),
     590             :                                    PAGE_CACHE_SIZE);
     591             :         read_unlock(&em_tree->lock);
     592          42 :         if (!em)
     593             :                 return -EIO;
     594             : 
     595          42 :         compressed_len = em->block_len;
     596          42 :         cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
     597          42 :         if (!cb)
     598             :                 goto out;
     599             : 
     600             :         atomic_set(&cb->pending_bios, 0);
     601          42 :         cb->errors = 0;
     602          42 :         cb->inode = inode;
     603          42 :         cb->mirror_num = mirror_num;
     604          42 :         sums = &cb->sums;
     605             : 
     606          42 :         cb->start = em->orig_start;
     607          42 :         em_len = em->len;
     608          42 :         em_start = em->start;
     609             : 
     610          42 :         free_extent_map(em);
     611             :         em = NULL;
     612             : 
     613          42 :         cb->len = uncompressed_len;
     614          42 :         cb->compressed_len = compressed_len;
     615          42 :         cb->compress_type = extent_compress_type(bio_flags);
     616          42 :         cb->orig_bio = bio;
     617             : 
     618          42 :         nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
     619             :                                  PAGE_CACHE_SIZE;
     620          42 :         cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
     621             :                                        GFP_NOFS);
     622          42 :         if (!cb->compressed_pages)
     623             :                 goto fail1;
     624             : 
     625          42 :         bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
     626             : 
     627          84 :         for (pg_index = 0; pg_index < nr_pages; pg_index++) {
     628          84 :                 cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
     629             :                                                               __GFP_HIGHMEM);
     630          42 :                 if (!cb->compressed_pages[pg_index]) {
     631           0 :                         faili = pg_index - 1;
     632             :                         ret = -ENOMEM;
     633           0 :                         goto fail2;
     634             :                 }
     635             :         }
     636          42 :         faili = nr_pages - 1;
     637          42 :         cb->nr_pages = nr_pages;
     638             : 
     639             :         /* In the parent-locked case, we only locked the range we are
     640             :          * interested in.  In all other cases, we can opportunistically
     641             :          * cache decompressed data that goes beyond the requested range. */
     642          42 :         if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
     643          42 :                 add_ra_bio_pages(inode, em_start + em_len, cb);
     644             : 
     645             :         /* include any pages we added in add_ra-bio_pages */
     646          42 :         uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
     647          42 :         cb->len = uncompressed_len;
     648             : 
     649          42 :         comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
     650          42 :         if (!comp_bio)
     651             :                 goto fail2;
     652          42 :         comp_bio->bi_private = cb;
     653          42 :         comp_bio->bi_end_io = end_compressed_bio_read;
     654          42 :         atomic_inc(&cb->pending_bios);
     655             : 
     656          84 :         for (pg_index = 0; pg_index < nr_pages; pg_index++) {
     657          42 :                 page = cb->compressed_pages[pg_index];
     658          42 :                 page->mapping = inode->i_mapping;
     659          42 :                 page->index = em_start >> PAGE_CACHE_SHIFT;
     660             : 
     661          42 :                 if (comp_bio->bi_iter.bi_size)
     662           0 :                         ret = tree->ops->merge_bio_hook(READ, page, 0,
     663             :                                                         PAGE_CACHE_SIZE,
     664             :                                                         comp_bio, 0);
     665             :                 else
     666             :                         ret = 0;
     667             : 
     668          42 :                 page->mapping = NULL;
     669          42 :                 if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
     670             :                     PAGE_CACHE_SIZE) {
     671           0 :                         bio_get(comp_bio);
     672             : 
     673           0 :                         ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
     674           0 :                         BUG_ON(ret); /* -ENOMEM */
     675             : 
     676             :                         /*
     677             :                          * inc the count before we submit the bio so
     678             :                          * we know the end IO handler won't happen before
     679             :                          * we inc the count.  Otherwise, the cb might get
     680             :                          * freed before we're done setting it up
     681             :                          */
     682             :                         atomic_inc(&cb->pending_bios);
     683             : 
     684           0 :                         if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
     685           0 :                                 ret = btrfs_lookup_bio_sums(root, inode,
     686             :                                                         comp_bio, sums);
     687           0 :                                 BUG_ON(ret); /* -ENOMEM */
     688             :                         }
     689           0 :                         sums += (comp_bio->bi_iter.bi_size +
     690           0 :                                  root->sectorsize - 1) / root->sectorsize;
     691             : 
     692           0 :                         ret = btrfs_map_bio(root, READ, comp_bio,
     693             :                                             mirror_num, 0);
     694           0 :                         if (ret)
     695           0 :                                 bio_endio(comp_bio, ret);
     696             : 
     697           0 :                         bio_put(comp_bio);
     698             : 
     699           0 :                         comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
     700             :                                                         GFP_NOFS);
     701           0 :                         BUG_ON(!comp_bio);
     702           0 :                         comp_bio->bi_private = cb;
     703           0 :                         comp_bio->bi_end_io = end_compressed_bio_read;
     704             : 
     705           0 :                         bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
     706             :                 }
     707          42 :                 cur_disk_byte += PAGE_CACHE_SIZE;
     708             :         }
     709          42 :         bio_get(comp_bio);
     710             : 
     711          42 :         ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
     712          42 :         BUG_ON(ret); /* -ENOMEM */
     713             : 
     714          42 :         if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
     715          42 :                 ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
     716          42 :                 BUG_ON(ret); /* -ENOMEM */
     717             :         }
     718             : 
     719          42 :         ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
     720          42 :         if (ret)
     721           0 :                 bio_endio(comp_bio, ret);
     722             : 
     723          42 :         bio_put(comp_bio);
     724          42 :         return 0;
     725             : 
     726             : fail2:
     727           0 :         while (faili >= 0) {
     728           0 :                 __free_page(cb->compressed_pages[faili]);
     729           0 :                 faili--;
     730             :         }
     731             : 
     732           0 :         kfree(cb->compressed_pages);
     733             : fail1:
     734           0 :         kfree(cb);
     735             : out:
     736           0 :         free_extent_map(em);
     737           0 :         return ret;
     738             : }
     739             : 
     740             : static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
     741             : static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
     742             : static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
     743             : static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
     744             : static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
     745             : 
     746             : static struct btrfs_compress_op *btrfs_compress_op[] = {
     747             :         &btrfs_zlib_compress,
     748             :         &btrfs_lzo_compress,
     749             : };
     750             : 
     751           0 : void __init btrfs_init_compress(void)
     752             : {
     753             :         int i;
     754             : 
     755           0 :         for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
     756           0 :                 INIT_LIST_HEAD(&comp_idle_workspace[i]);
     757           0 :                 spin_lock_init(&comp_workspace_lock[i]);
     758           0 :                 atomic_set(&comp_alloc_workspace[i], 0);
     759           0 :                 init_waitqueue_head(&comp_workspace_wait[i]);
     760             :         }
     761           0 : }
     762             : 
     763             : /*
     764             :  * this finds an available workspace or allocates a new one
     765             :  * ERR_PTR is returned if things go bad.
     766             :  */
     767         197 : static struct list_head *find_workspace(int type)
     768             : {
     769             :         struct list_head *workspace;
     770         197 :         int cpus = num_online_cpus();
     771         197 :         int idx = type - 1;
     772             : 
     773         197 :         struct list_head *idle_workspace        = &comp_idle_workspace[idx];
     774         197 :         spinlock_t *workspace_lock              = &comp_workspace_lock[idx];
     775         197 :         atomic_t *alloc_workspace               = &comp_alloc_workspace[idx];
     776         197 :         wait_queue_head_t *workspace_wait       = &comp_workspace_wait[idx];
     777             :         int *num_workspace                      = &comp_num_workspace[idx];
     778             : again:
     779             :         spin_lock(workspace_lock);
     780         197 :         if (!list_empty(idle_workspace)) {
     781             :                 workspace = idle_workspace->next;
     782         196 :                 list_del(workspace);
     783         196 :                 (*num_workspace)--;
     784             :                 spin_unlock(workspace_lock);
     785         196 :                 return workspace;
     786             : 
     787             :         }
     788           1 :         if (atomic_read(alloc_workspace) > cpus) {
     789           0 :                 DEFINE_WAIT(wait);
     790             : 
     791             :                 spin_unlock(workspace_lock);
     792           0 :                 prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
     793           0 :                 if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
     794           0 :                         schedule();
     795           0 :                 finish_wait(workspace_wait, &wait);
     796             :                 goto again;
     797             :         }
     798             :         atomic_inc(alloc_workspace);
     799             :         spin_unlock(workspace_lock);
     800             : 
     801           1 :         workspace = btrfs_compress_op[idx]->alloc_workspace();
     802           1 :         if (IS_ERR(workspace)) {
     803             :                 atomic_dec(alloc_workspace);
     804           0 :                 wake_up(workspace_wait);
     805             :         }
     806           1 :         return workspace;
     807             : }
     808             : 
     809             : /*
     810             :  * put a workspace struct back on the list or free it if we have enough
     811             :  * idle ones sitting around
     812             :  */
     813         197 : static void free_workspace(int type, struct list_head *workspace)
     814             : {
     815         197 :         int idx = type - 1;
     816         197 :         struct list_head *idle_workspace        = &comp_idle_workspace[idx];
     817         197 :         spinlock_t *workspace_lock              = &comp_workspace_lock[idx];
     818         197 :         atomic_t *alloc_workspace               = &comp_alloc_workspace[idx];
     819         197 :         wait_queue_head_t *workspace_wait       = &comp_workspace_wait[idx];
     820             :         int *num_workspace                      = &comp_num_workspace[idx];
     821             : 
     822             :         spin_lock(workspace_lock);
     823         394 :         if (*num_workspace < num_online_cpus()) {
     824             :                 list_add(workspace, idle_workspace);
     825         197 :                 (*num_workspace)++;
     826             :                 spin_unlock(workspace_lock);
     827             :                 goto wake;
     828             :         }
     829             :         spin_unlock(workspace_lock);
     830             : 
     831           0 :         btrfs_compress_op[idx]->free_workspace(workspace);
     832             :         atomic_dec(alloc_workspace);
     833             : wake:
     834         197 :         smp_mb();
     835         197 :         if (waitqueue_active(workspace_wait))
     836           0 :                 wake_up(workspace_wait);
     837         197 : }
     838             : 
     839             : /*
     840             :  * cleanup function for module exit
     841             :  */
     842           0 : static void free_workspaces(void)
     843             : {
     844             :         struct list_head *workspace;
     845             :         int i;
     846             : 
     847           0 :         for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
     848           0 :                 while (!list_empty(&comp_idle_workspace[i])) {
     849           0 :                         workspace = comp_idle_workspace[i].next;
     850           0 :                         list_del(workspace);
     851           0 :                         btrfs_compress_op[i]->free_workspace(workspace);
     852           0 :                         atomic_dec(&comp_alloc_workspace[i]);
     853             :                 }
     854             :         }
     855           0 : }
     856             : 
     857             : /*
     858             :  * given an address space and start/len, compress the bytes.
     859             :  *
     860             :  * pages are allocated to hold the compressed result and stored
     861             :  * in 'pages'
     862             :  *
     863             :  * out_pages is used to return the number of pages allocated.  There
     864             :  * may be pages allocated even if we return an error
     865             :  *
     866             :  * total_in is used to return the number of bytes actually read.  It
     867             :  * may be smaller then len if we had to exit early because we
     868             :  * ran out of room in the pages array or because we cross the
     869             :  * max_out threshold.
     870             :  *
     871             :  * total_out is used to return the total number of compressed bytes
     872             :  *
     873             :  * max_out tells us the max number of bytes that we're allowed to
     874             :  * stuff into pages
     875             :  */
     876         155 : int btrfs_compress_pages(int type, struct address_space *mapping,
     877             :                          u64 start, unsigned long len,
     878             :                          struct page **pages,
     879             :                          unsigned long nr_dest_pages,
     880             :                          unsigned long *out_pages,
     881             :                          unsigned long *total_in,
     882             :                          unsigned long *total_out,
     883             :                          unsigned long max_out)
     884             : {
     885             :         struct list_head *workspace;
     886             :         int ret;
     887             : 
     888         155 :         workspace = find_workspace(type);
     889         155 :         if (IS_ERR(workspace))
     890           0 :                 return PTR_ERR(workspace);
     891             : 
     892         155 :         ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
     893             :                                                       start, len, pages,
     894             :                                                       nr_dest_pages, out_pages,
     895             :                                                       total_in, total_out,
     896             :                                                       max_out);
     897         155 :         free_workspace(type, workspace);
     898         155 :         return ret;
     899             : }
     900             : 
     901             : /*
     902             :  * pages_in is an array of pages with compressed data.
     903             :  *
     904             :  * disk_start is the starting logical offset of this array in the file
     905             :  *
     906             :  * bvec is a bio_vec of pages from the file that we want to decompress into
     907             :  *
     908             :  * vcnt is the count of pages in the biovec
     909             :  *
     910             :  * srclen is the number of bytes in pages_in
     911             :  *
     912             :  * The basic idea is that we have a bio that was created by readpages.
     913             :  * The pages in the bio are for the uncompressed data, and they may not
     914             :  * be contiguous.  They all correspond to the range of bytes covered by
     915             :  * the compressed extent.
     916             :  */
     917          42 : static int btrfs_decompress_biovec(int type, struct page **pages_in,
     918             :                                    u64 disk_start, struct bio_vec *bvec,
     919             :                                    int vcnt, size_t srclen)
     920             : {
     921             :         struct list_head *workspace;
     922             :         int ret;
     923             : 
     924          42 :         workspace = find_workspace(type);
     925          42 :         if (IS_ERR(workspace))
     926           0 :                 return PTR_ERR(workspace);
     927             : 
     928          42 :         ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
     929             :                                                          disk_start,
     930             :                                                          bvec, vcnt, srclen);
     931          42 :         free_workspace(type, workspace);
     932          42 :         return ret;
     933             : }
     934             : 
     935             : /*
     936             :  * a less complex decompression routine.  Our compressed data fits in a
     937             :  * single page, and we want to read a single page out of it.
     938             :  * start_byte tells us the offset into the compressed data we're interested in
     939             :  */
     940           0 : int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
     941             :                      unsigned long start_byte, size_t srclen, size_t destlen)
     942             : {
     943             :         struct list_head *workspace;
     944             :         int ret;
     945             : 
     946           0 :         workspace = find_workspace(type);
     947           0 :         if (IS_ERR(workspace))
     948           0 :                 return PTR_ERR(workspace);
     949             : 
     950           0 :         ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
     951             :                                                   dest_page, start_byte,
     952             :                                                   srclen, destlen);
     953             : 
     954           0 :         free_workspace(type, workspace);
     955           0 :         return ret;
     956             : }
     957             : 
     958           0 : void btrfs_exit_compress(void)
     959             : {
     960           0 :         free_workspaces();
     961           0 : }
     962             : 
     963             : /*
     964             :  * Copy uncompressed data from working buffer to pages.
     965             :  *
     966             :  * buf_start is the byte offset we're of the start of our workspace buffer.
     967             :  *
     968             :  * total_out is the last byte of the buffer
     969             :  */
     970         364 : int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
     971             :                               unsigned long total_out, u64 disk_start,
     972             :                               struct bio_vec *bvec, int vcnt,
     973             :                               unsigned long *pg_index,
     974             :                               unsigned long *pg_offset)
     975             : {
     976             :         unsigned long buf_offset;
     977             :         unsigned long current_buf_start;
     978             :         unsigned long start_byte;
     979         364 :         unsigned long working_bytes = total_out - buf_start;
     980             :         unsigned long bytes;
     981             :         char *kaddr;
     982         654 :         struct page *page_out = bvec[*pg_index].bv_page;
     983             : 
     984             :         /*
     985             :          * start byte is the first byte of the page we're currently
     986             :          * copying into relative to the start of the compressed data.
     987             :          */
     988         364 :         start_byte = page_offset(page_out) - disk_start;
     989             : 
     990             :         /* we haven't yet hit data corresponding to this page */
     991         364 :         if (total_out <= start_byte)
     992             :                 return 1;
     993             : 
     994             :         /*
     995             :          * the start of the data we care about is offset into
     996             :          * the middle of our working buffer
     997             :          */
     998         332 :         if (total_out > start_byte && buf_start < start_byte) {
     999           0 :                 buf_offset = start_byte - buf_start;
    1000           0 :                 working_bytes -= buf_offset;
    1001             :         } else {
    1002             :                 buf_offset = 0;
    1003             :         }
    1004             :         current_buf_start = buf_start;
    1005             : 
    1006             :         /* copy bytes from the working buffer into the pages */
    1007         694 :         while (working_bytes > 0) {
    1008         332 :                 bytes = min(PAGE_CACHE_SIZE - *pg_offset,
    1009             :                             PAGE_CACHE_SIZE - buf_offset);
    1010         332 :                 bytes = min(bytes, working_bytes);
    1011             :                 kaddr = kmap_atomic(page_out);
    1012         332 :                 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
    1013         332 :                 if (*pg_index == (vcnt - 1) && *pg_offset == 0)
    1014          42 :                         memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
    1015             :                 kunmap_atomic(kaddr);
    1016             :                 flush_dcache_page(page_out);
    1017             : 
    1018         332 :                 *pg_offset += bytes;
    1019         332 :                 buf_offset += bytes;
    1020         332 :                 working_bytes -= bytes;
    1021         332 :                 current_buf_start += bytes;
    1022             : 
    1023             :                 /* check if we need to pick another page */
    1024         332 :                 if (*pg_offset == PAGE_CACHE_SIZE) {
    1025         302 :                         (*pg_index)++;
    1026         302 :                         if (*pg_index >= vcnt)
    1027             :                                 return 0;
    1028             : 
    1029         290 :                         page_out = bvec[*pg_index].bv_page;
    1030         290 :                         *pg_offset = 0;
    1031         290 :                         start_byte = page_offset(page_out) - disk_start;
    1032             : 
    1033             :                         /*
    1034             :                          * make sure our new page is covered by this
    1035             :                          * working buffer
    1036             :                          */
    1037         290 :                         if (total_out <= start_byte)
    1038             :                                 return 1;
    1039             : 
    1040             :                         /*
    1041             :                          * the next page in the biovec might not be adjacent
    1042             :                          * to the last page, but it might still be found
    1043             :                          * inside this working buffer. bump our offset pointer
    1044             :                          */
    1045           0 :                         if (total_out > start_byte &&
    1046           0 :                             current_buf_start < start_byte) {
    1047           0 :                                 buf_offset = start_byte - buf_start;
    1048           0 :                                 working_bytes = total_out - start_byte;
    1049             :                                 current_buf_start = buf_start + buf_offset;
    1050             :                         }
    1051             :                 }
    1052             :         }
    1053             : 
    1054             :         return 1;
    1055             : }

Generated by: LCOV version 1.10