LCOV - code coverage report
Current view: top level - fs/btrfs - volumes.c (source / functions) Hit Total Coverage
Test: btrfstest.info Lines: 1528 2704 56.5 %
Date: 2014-11-28 Functions: 79 115 68.7 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       3             :  *
       4             :  * This program is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU General Public
       6             :  * License v2 as published by the Free Software Foundation.
       7             :  *
       8             :  * This program is distributed in the hope that it will be useful,
       9             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      10             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      11             :  * General Public License for more details.
      12             :  *
      13             :  * You should have received a copy of the GNU General Public
      14             :  * License along with this program; if not, write to the
      15             :  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      16             :  * Boston, MA 021110-1307, USA.
      17             :  */
      18             : #include <linux/sched.h>
      19             : #include <linux/bio.h>
      20             : #include <linux/slab.h>
      21             : #include <linux/buffer_head.h>
      22             : #include <linux/blkdev.h>
      23             : #include <linux/random.h>
      24             : #include <linux/iocontext.h>
      25             : #include <linux/capability.h>
      26             : #include <linux/ratelimit.h>
      27             : #include <linux/kthread.h>
      28             : #include <linux/raid/pq.h>
      29             : #include <linux/semaphore.h>
      30             : #include <asm/div64.h>
      31             : #include "ctree.h"
      32             : #include "extent_map.h"
      33             : #include "disk-io.h"
      34             : #include "transaction.h"
      35             : #include "print-tree.h"
      36             : #include "volumes.h"
      37             : #include "raid56.h"
      38             : #include "async-thread.h"
      39             : #include "check-integrity.h"
      40             : #include "rcu-string.h"
      41             : #include "math.h"
      42             : #include "dev-replace.h"
      43             : #include "sysfs.h"
      44             : 
      45             : static int init_first_rw_device(struct btrfs_trans_handle *trans,
      46             :                                 struct btrfs_root *root,
      47             :                                 struct btrfs_device *device);
      48             : static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
      49             : static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
      50             : static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
      51             : static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
      52             : 
      53             : static DEFINE_MUTEX(uuid_mutex);
      54             : static LIST_HEAD(fs_uuids);
      55             : 
      56             : static void lock_chunks(struct btrfs_root *root)
      57             : {
      58         293 :         mutex_lock(&root->fs_info->chunk_mutex);
      59             : }
      60             : 
      61             : static void unlock_chunks(struct btrfs_root *root)
      62             : {
      63         293 :         mutex_unlock(&root->fs_info->chunk_mutex);
      64             : }
      65             : 
      66         106 : static struct btrfs_fs_devices *__alloc_fs_devices(void)
      67             : {
      68             :         struct btrfs_fs_devices *fs_devs;
      69             : 
      70         106 :         fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
      71         106 :         if (!fs_devs)
      72             :                 return ERR_PTR(-ENOMEM);
      73             : 
      74         106 :         mutex_init(&fs_devs->device_list_mutex);
      75             : 
      76         106 :         INIT_LIST_HEAD(&fs_devs->devices);
      77         106 :         INIT_LIST_HEAD(&fs_devs->alloc_list);
      78         106 :         INIT_LIST_HEAD(&fs_devs->list);
      79             : 
      80         106 :         return fs_devs;
      81             : }
      82             : 
      83             : /**
      84             :  * alloc_fs_devices - allocate struct btrfs_fs_devices
      85             :  * @fsid:       a pointer to UUID for this FS.  If NULL a new UUID is
      86             :  *              generated.
      87             :  *
      88             :  * Return: a pointer to a new &struct btrfs_fs_devices on success;
      89             :  * ERR_PTR() on error.  Returned struct is not linked onto any lists and
      90             :  * can be destroyed with kfree() right away.
      91             :  */
      92         106 : static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
      93             : {
      94             :         struct btrfs_fs_devices *fs_devs;
      95             : 
      96         106 :         fs_devs = __alloc_fs_devices();
      97         106 :         if (IS_ERR(fs_devs))
      98             :                 return fs_devs;
      99             : 
     100         106 :         if (fsid)
     101         106 :                 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
     102             :         else
     103           0 :                 generate_random_uuid(fs_devs->fsid);
     104             : 
     105             :         return fs_devs;
     106             : }
     107             : 
     108           0 : static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
     109             : {
     110             :         struct btrfs_device *device;
     111           0 :         WARN_ON(fs_devices->opened);
     112           0 :         while (!list_empty(&fs_devices->devices)) {
     113             :                 device = list_entry(fs_devices->devices.next,
     114             :                                     struct btrfs_device, dev_list);
     115           0 :                 list_del(&device->dev_list);
     116           0 :                 rcu_string_free(device->name);
     117           0 :                 kfree(device);
     118             :         }
     119           0 :         kfree(fs_devices);
     120           0 : }
     121             : 
     122           0 : static void btrfs_kobject_uevent(struct block_device *bdev,
     123             :                                  enum kobject_action action)
     124             : {
     125             :         int ret;
     126             : 
     127           0 :         ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
     128           0 :         if (ret)
     129           0 :                 pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
     130             :                         action,
     131             :                         kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
     132             :                         &disk_to_dev(bdev->bd_disk)->kobj);
     133           0 : }
     134             : 
     135           0 : void btrfs_cleanup_fs_uuids(void)
     136             : {
     137             :         struct btrfs_fs_devices *fs_devices;
     138             : 
     139           0 :         while (!list_empty(&fs_uuids)) {
     140           0 :                 fs_devices = list_entry(fs_uuids.next,
     141             :                                         struct btrfs_fs_devices, list);
     142           0 :                 list_del(&fs_devices->list);
     143           0 :                 free_fs_devices(fs_devices);
     144             :         }
     145           0 : }
     146             : 
     147         382 : static struct btrfs_device *__alloc_device(void)
     148             : {
     149             :         struct btrfs_device *dev;
     150             : 
     151         382 :         dev = kzalloc(sizeof(*dev), GFP_NOFS);
     152         382 :         if (!dev)
     153             :                 return ERR_PTR(-ENOMEM);
     154             : 
     155         382 :         INIT_LIST_HEAD(&dev->dev_list);
     156         382 :         INIT_LIST_HEAD(&dev->dev_alloc_list);
     157             : 
     158         382 :         spin_lock_init(&dev->io_lock);
     159             : 
     160         382 :         spin_lock_init(&dev->reada_lock);
     161             :         atomic_set(&dev->reada_in_flight, 0);
     162         382 :         INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
     163         382 :         INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
     164             : 
     165         382 :         return dev;
     166             : }
     167             : 
     168        2480 : static noinline struct btrfs_device *__find_device(struct list_head *head,
     169             :                                                    u64 devid, u8 *uuid)
     170             : {
     171             :         struct btrfs_device *dev;
     172             : 
     173        2994 :         list_for_each_entry(dev, head, dev_list) {
     174        2950 :                 if (dev->devid == devid &&
     175        2359 :                     (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
     176             :                         return dev;
     177             :                 }
     178             :         }
     179             :         return NULL;
     180             : }
     181             : 
     182         613 : static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
     183             : {
     184             :         struct btrfs_fs_devices *fs_devices;
     185             : 
     186       53577 :         list_for_each_entry(fs_devices, &fs_uuids, list) {
     187       53471 :                 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
     188             :                         return fs_devices;
     189             :         }
     190             :         return NULL;
     191             : }
     192             : 
     193             : static int
     194         256 : btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
     195             :                       int flush, struct block_device **bdev,
     196             :                       struct buffer_head **bh)
     197             : {
     198             :         int ret;
     199             : 
     200         256 :         *bdev = blkdev_get_by_path(device_path, flags, holder);
     201             : 
     202         256 :         if (IS_ERR(*bdev)) {
     203           0 :                 ret = PTR_ERR(*bdev);
     204           0 :                 printk(KERN_INFO "BTRFS: open %s failed\n", device_path);
     205           0 :                 goto error;
     206             :         }
     207             : 
     208         256 :         if (flush)
     209         248 :                 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
     210         256 :         ret = set_blocksize(*bdev, 4096);
     211         256 :         if (ret) {
     212           0 :                 blkdev_put(*bdev, flags);
     213           0 :                 goto error;
     214             :         }
     215         256 :         invalidate_bdev(*bdev);
     216         256 :         *bh = btrfs_read_dev_super(*bdev);
     217         256 :         if (!*bh) {
     218             :                 ret = -EINVAL;
     219           0 :                 blkdev_put(*bdev, flags);
     220           0 :                 goto error;
     221             :         }
     222             : 
     223             :         return 0;
     224             : 
     225             : error:
     226           0 :         *bdev = NULL;
     227           0 :         *bh = NULL;
     228           0 :         return ret;
     229             : }
     230             : 
     231             : static void requeue_list(struct btrfs_pending_bios *pending_bios,
     232             :                         struct bio *head, struct bio *tail)
     233             : {
     234             : 
     235             :         struct bio *old_head;
     236             : 
     237           0 :         old_head = pending_bios->head;
     238           0 :         pending_bios->head = head;
     239           0 :         if (pending_bios->tail)
     240           0 :                 tail->bi_next = old_head;
     241             :         else
     242           0 :                 pending_bios->tail = tail;
     243             : }
     244             : 
     245             : /*
     246             :  * we try to collect pending bios for a device so we don't get a large
     247             :  * number of procs sending bios down to the same device.  This greatly
     248             :  * improves the schedulers ability to collect and merge the bios.
     249             :  *
     250             :  * But, it also turns into a long list of bios to process and that is sure
     251             :  * to eventually make the worker thread block.  The solution here is to
     252             :  * make some progress and then put this work struct back at the end of
     253             :  * the list if the block device is congested.  This way, multiple devices
     254             :  * can make progress from a single worker thread.
     255             :  */
     256        7328 : static noinline void run_scheduled_bios(struct btrfs_device *device)
     257             : {
     258             :         struct bio *pending;
     259             :         struct backing_dev_info *bdi;
     260             :         struct btrfs_fs_info *fs_info;
     261             :         struct btrfs_pending_bios *pending_bios;
     262             :         struct bio *tail;
     263             :         struct bio *cur;
     264             :         int again = 0;
     265             :         unsigned long num_run;
     266             :         unsigned long batch_run = 0;
     267             :         unsigned long limit;
     268             :         unsigned long last_waited = 0;
     269             :         int force_reg = 0;
     270             :         int sync_pending = 0;
     271             :         struct blk_plug plug;
     272             : 
     273             :         /*
     274             :          * this function runs all the bios we've collected for
     275             :          * a particular device.  We don't want to wander off to
     276             :          * another device without first sending all of these down.
     277             :          * So, setup a plug here and finish it off before we return
     278             :          */
     279        7328 :         blk_start_plug(&plug);
     280             : 
     281        7327 :         bdi = blk_get_backing_dev_info(device->bdev);
     282        7328 :         fs_info = device->dev_root->fs_info;
     283        7328 :         limit = btrfs_async_submit_limit(fs_info);
     284        7326 :         limit = limit * 2 / 3;
     285             : 
     286             : loop:
     287             :         spin_lock(&device->io_lock);
     288             : 
     289             : loop_lock:
     290             :         num_run = 0;
     291             : 
     292             :         /* take all the bios off the list at once and process them
     293             :          * later on (without the lock held).  But, remember the
     294             :          * tail and other pointers so the bios can be properly reinserted
     295             :          * into the list if we hit congestion
     296             :          */
     297       15383 :         if (!force_reg && device->pending_sync_bios.head) {
     298         552 :                 pending_bios = &device->pending_sync_bios;
     299         552 :                 force_reg = 1;
     300             :         } else {
     301       14831 :                 pending_bios = &device->pending_bios;
     302             :                 force_reg = 0;
     303             :         }
     304             : 
     305       15383 :         pending = pending_bios->head;
     306       15383 :         tail = pending_bios->tail;
     307       15383 :         WARN_ON(pending && !tail);
     308             : 
     309             :         /*
     310             :          * if pending was null this time around, no bios need processing
     311             :          * at all and we can stop.  Otherwise it'll loop back up again
     312             :          * and do an additional check so no bios are missed.
     313             :          *
     314             :          * device->running_pending is used to synchronize with the
     315             :          * schedule_bio code.
     316             :          */
     317       30177 :         if (device->pending_sync_bios.head == NULL &&
     318       14793 :             device->pending_bios.head == NULL) {
     319             :                 again = 0;
     320        7344 :                 device->running_pending = 0;
     321             :         } else {
     322             :                 again = 1;
     323        8040 :                 device->running_pending = 1;
     324             :         }
     325             : 
     326       15384 :         pending_bios->head = NULL;
     327       15384 :         pending_bios->tail = NULL;
     328             : 
     329             :         spin_unlock(&device->io_lock);
     330             : 
     331      121316 :         while (pending) {
     332             : 
     333      105934 :                 rmb();
     334             :                 /* we want to work on both lists, but do more bios on the
     335             :                  * sync list than the regular list
     336             :                  */
     337      184469 :                 if ((num_run > 32 &&
     338      157070 :                     pending_bios != &device->pending_sync_bios &&
     339      184471 :                     device->pending_sync_bios.head) ||
     340       74100 :                    (num_run > 64 && pending_bios == &device->pending_sync_bios &&
     341           0 :                     device->pending_bios.head)) {
     342             :                         spin_lock(&device->io_lock);
     343             :                         requeue_list(pending_bios, pending, tail);
     344             :                         goto loop_lock;
     345             :                 }
     346             : 
     347             :                 cur = pending;
     348      105936 :                 pending = pending->bi_next;
     349      105936 :                 cur->bi_next = NULL;
     350             : 
     351      243661 :                 if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
     352             :                     waitqueue_active(&fs_info->async_submit_wait))
     353          18 :                         wake_up(&fs_info->async_submit_wait);
     354             : 
     355      105948 :                 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
     356             : 
     357             :                 /*
     358             :                  * if we're doing the sync list, record that our
     359             :                  * plug has some sync requests on it
     360             :                  *
     361             :                  * If we're doing the regular list and there are
     362             :                  * sync requests sitting around, unplug before
     363             :                  * we add more
     364             :                  */
     365      105948 :                 if (pending_bios == &device->pending_sync_bios) {
     366             :                         sync_pending = 1;
     367      105320 :                 } else if (sync_pending) {
     368          30 :                         blk_finish_plug(&plug);
     369          30 :                         blk_start_plug(&plug);
     370             :                         sync_pending = 0;
     371             :                 }
     372             : 
     373      105948 :                 btrfsic_submit_bio(cur->bi_rw, cur);
     374      105926 :                 num_run++;
     375      105926 :                 batch_run++;
     376      105933 :                 if (need_resched())
     377          35 :                         cond_resched();
     378             : 
     379             :                 /*
     380             :                  * we made progress, there is more work to do and the bdi
     381             :                  * is now congested.  Back off and let other work structs
     382             :                  * run instead
     383             :                  */
     384      203871 :                 if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
     385           0 :                     fs_info->fs_devices->open_devices > 1) {
     386             :                         struct io_context *ioc;
     387             : 
     388           0 :                         ioc = current->io_context;
     389             : 
     390             :                         /*
     391             :                          * the main goal here is that we don't want to
     392             :                          * block if we're going to be able to submit
     393             :                          * more requests without blocking.
     394             :                          *
     395             :                          * This code does two great things, it pokes into
     396             :                          * the elevator code from a filesystem _and_
     397             :                          * it makes assumptions about how batching works.
     398             :                          */
     399           0 :                         if (ioc && ioc->nr_batch_requests > 0 &&
     400           0 :                             time_before(jiffies, ioc->last_waited + HZ/50UL) &&
     401           0 :                             (last_waited == 0 ||
     402             :                              ioc->last_waited == last_waited)) {
     403             :                                 /*
     404             :                                  * we want to go through our batch of
     405             :                                  * requests and stop.  So, we copy out
     406             :                                  * the ioc->last_waited time and test
     407             :                                  * against it before looping
     408             :                                  */
     409             :                                 last_waited = ioc->last_waited;
     410           0 :                                 if (need_resched())
     411           0 :                                         cond_resched();
     412           0 :                                 continue;
     413             :                         }
     414             :                         spin_lock(&device->io_lock);
     415             :                         requeue_list(pending_bios, pending, tail);
     416           0 :                         device->running_pending = 1;
     417             : 
     418             :                         spin_unlock(&device->io_lock);
     419           0 :                         btrfs_queue_work(fs_info->submit_workers,
     420             :                                          &device->work);
     421           0 :                         goto done;
     422             :                 }
     423             :                 /* unplug every 64 requests just for good measure */
     424      105939 :                 if (batch_run % 64 == 0) {
     425        1285 :                         blk_finish_plug(&plug);
     426        1285 :                         blk_start_plug(&plug);
     427             :                         sync_pending = 0;
     428             :                 }
     429             :         }
     430             : 
     431       15382 :         cond_resched();
     432       15382 :         if (again)
     433             :                 goto loop;
     434             : 
     435             :         spin_lock(&device->io_lock);
     436        7344 :         if (device->pending_bios.head || device->pending_sync_bios.head)
     437             :                 goto loop_lock;
     438             :         spin_unlock(&device->io_lock);
     439             : 
     440             : done:
     441        7328 :         blk_finish_plug(&plug);
     442        7328 : }
     443             : 
     444        7328 : static void pending_bios_fn(struct btrfs_work *work)
     445             : {
     446             :         struct btrfs_device *device;
     447             : 
     448        7328 :         device = container_of(work, struct btrfs_device, work);
     449        7328 :         run_scheduled_bios(device);
     450        7328 : }
     451             : 
     452             : /*
     453             :  * Add new device to list of registered devices
     454             :  *
     455             :  * Returns:
     456             :  * 1   - first time device is seen
     457             :  * 0   - device already known
     458             :  * < 0 - error
     459             :  */
     460         613 : static noinline int device_list_add(const char *path,
     461             :                            struct btrfs_super_block *disk_super,
     462             :                            u64 devid, struct btrfs_fs_devices **fs_devices_ret)
     463             : {
     464             :         struct btrfs_device *device;
     465             :         struct btrfs_fs_devices *fs_devices;
     466             :         struct rcu_string *name;
     467             :         int ret = 0;
     468             :         u64 found_transid = btrfs_super_generation(disk_super);
     469             : 
     470         613 :         fs_devices = find_fsid(disk_super->fsid);
     471         613 :         if (!fs_devices) {
     472         106 :                 fs_devices = alloc_fs_devices(disk_super->fsid);
     473         106 :                 if (IS_ERR(fs_devices))
     474           0 :                         return PTR_ERR(fs_devices);
     475             : 
     476         106 :                 list_add(&fs_devices->list, &fs_uuids);
     477         106 :                 fs_devices->latest_devid = devid;
     478         106 :                 fs_devices->latest_trans = found_transid;
     479             : 
     480             :                 device = NULL;
     481             :         } else {
     482         507 :                 device = __find_device(&fs_devices->devices, devid,
     483         507 :                                        disk_super->dev_item.uuid);
     484             :         }
     485         613 :         if (!device) {
     486         126 :                 if (fs_devices->opened)
     487             :                         return -EBUSY;
     488             : 
     489         126 :                 device = btrfs_alloc_device(NULL, &devid,
     490         126 :                                             disk_super->dev_item.uuid);
     491         126 :                 if (IS_ERR(device)) {
     492             :                         /* we can safely leave the fs_devices entry around */
     493           0 :                         return PTR_ERR(device);
     494             :                 }
     495             : 
     496         126 :                 name = rcu_string_strdup(path, GFP_NOFS);
     497         126 :                 if (!name) {
     498           0 :                         kfree(device);
     499           0 :                         return -ENOMEM;
     500             :                 }
     501         126 :                 rcu_assign_pointer(device->name, name);
     502             : 
     503         126 :                 mutex_lock(&fs_devices->device_list_mutex);
     504         126 :                 list_add_rcu(&device->dev_list, &fs_devices->devices);
     505         126 :                 fs_devices->num_devices++;
     506         126 :                 mutex_unlock(&fs_devices->device_list_mutex);
     507             : 
     508             :                 ret = 1;
     509         126 :                 device->fs_devices = fs_devices;
     510         487 :         } else if (!device->name || strcmp(device->name->str, path)) {
     511             :                 /*
     512             :                  * When FS is already mounted.
     513             :                  * 1. If you are here and if the device->name is NULL that
     514             :                  *    means this device was missing at time of FS mount.
     515             :                  * 2. If you are here and if the device->name is different
     516             :                  *    from 'path' that means either
     517             :                  *      a. The same device disappeared and reappeared with
     518             :                  *         different name. or
     519             :                  *      b. The missing-disk-which-was-replaced, has
     520             :                  *         reappeared now.
     521             :                  *
     522             :                  * We must allow 1 and 2a above. But 2b would be a spurious
     523             :                  * and unintentional.
     524             :                  *
     525             :                  * Further in case of 1 and 2a above, the disk at 'path'
     526             :                  * would have missed some transaction when it was away and
     527             :                  * in case of 2a the stale bdev has to be updated as well.
     528             :                  * 2b must not be allowed at all time.
     529             :                  */
     530             : 
     531             :                 /*
     532             :                  * For now, we do allow update to btrfs_fs_device through the
     533             :                  * btrfs dev scan cli after FS has been mounted.  We're still
     534             :                  * tracking a problem where systems fail mount by subvolume id
     535             :                  * when we reject replacement on a mounted FS.
     536             :                  */
     537           0 :                 if (!fs_devices->opened && found_transid < device->generation) {
     538             :                         /*
     539             :                          * That is if the FS is _not_ mounted and if you
     540             :                          * are here, that means there is more than one
     541             :                          * disk with same uuid and devid.We keep the one
     542             :                          * with larger generation number or the last-in if
     543             :                          * generation are equal.
     544             :                          */
     545             :                         return -EEXIST;
     546             :                 }
     547             : 
     548           0 :                 name = rcu_string_strdup(path, GFP_NOFS);
     549           0 :                 if (!name)
     550             :                         return -ENOMEM;
     551           0 :                 rcu_string_free(device->name);
     552           0 :                 rcu_assign_pointer(device->name, name);
     553           0 :                 if (device->missing) {
     554           0 :                         fs_devices->missing_devices--;
     555           0 :                         device->missing = 0;
     556             :                 }
     557             :         }
     558             : 
     559             :         /*
     560             :          * Unmount does not free the btrfs_device struct but would zero
     561             :          * generation along with most of the other members. So just update
     562             :          * it back. We need it to pick the disk with largest generation
     563             :          * (as above).
     564             :          */
     565         613 :         if (!fs_devices->opened)
     566         498 :                 device->generation = found_transid;
     567             : 
     568         613 :         if (found_transid > fs_devices->latest_trans) {
     569         312 :                 fs_devices->latest_devid = devid;
     570         312 :                 fs_devices->latest_trans = found_transid;
     571             :         }
     572         613 :         *fs_devices_ret = fs_devices;
     573             : 
     574         613 :         return ret;
     575             : }
     576             : 
     577           0 : static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
     578             : {
     579             :         struct btrfs_fs_devices *fs_devices;
     580             :         struct btrfs_device *device;
     581             :         struct btrfs_device *orig_dev;
     582             : 
     583           0 :         fs_devices = alloc_fs_devices(orig->fsid);
     584           0 :         if (IS_ERR(fs_devices))
     585             :                 return fs_devices;
     586             : 
     587           0 :         fs_devices->latest_devid = orig->latest_devid;
     588           0 :         fs_devices->latest_trans = orig->latest_trans;
     589           0 :         fs_devices->total_devices = orig->total_devices;
     590             : 
     591             :         /* We have held the volume lock, it is safe to get the devices. */
     592           0 :         list_for_each_entry(orig_dev, &orig->devices, dev_list) {
     593             :                 struct rcu_string *name;
     594             : 
     595           0 :                 device = btrfs_alloc_device(NULL, &orig_dev->devid,
     596           0 :                                             orig_dev->uuid);
     597           0 :                 if (IS_ERR(device))
     598             :                         goto error;
     599             : 
     600             :                 /*
     601             :                  * This is ok to do without rcu read locked because we hold the
     602             :                  * uuid mutex so nothing we touch in here is going to disappear.
     603             :                  */
     604           0 :                 if (orig_dev->name) {
     605           0 :                         name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
     606           0 :                         if (!name) {
     607           0 :                                 kfree(device);
     608           0 :                                 goto error;
     609             :                         }
     610           0 :                         rcu_assign_pointer(device->name, name);
     611             :                 }
     612             : 
     613           0 :                 list_add(&device->dev_list, &fs_devices->devices);
     614           0 :                 device->fs_devices = fs_devices;
     615           0 :                 fs_devices->num_devices++;
     616             :         }
     617             :         return fs_devices;
     618             : error:
     619           0 :         free_fs_devices(fs_devices);
     620           0 :         return ERR_PTR(-ENOMEM);
     621             : }
     622             : 
     623         442 : void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
     624             :                                struct btrfs_fs_devices *fs_devices, int step)
     625             : {
     626             :         struct btrfs_device *device, *next;
     627             : 
     628             :         struct block_device *latest_bdev = NULL;
     629             :         u64 latest_devid = 0;
     630             :         u64 latest_transid = 0;
     631             : 
     632         442 :         mutex_lock(&uuid_mutex);
     633             : again:
     634             :         /* This is the initialized path, it is safe to release the devices. */
     635         938 :         list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
     636         496 :                 if (device->in_fs_metadata) {
     637         496 :                         if (!device->is_tgtdev_for_dev_replace &&
     638          54 :                             (!latest_transid ||
     639          54 :                              device->generation > latest_transid)) {
     640         442 :                                 latest_devid = device->devid;
     641         442 :                                 latest_transid = device->generation;
     642         442 :                                 latest_bdev = device->bdev;
     643             :                         }
     644         496 :                         continue;
     645             :                 }
     646             : 
     647           0 :                 if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
     648             :                         /*
     649             :                          * In the first step, keep the device which has
     650             :                          * the correct fsid and the devid that is used
     651             :                          * for the dev_replace procedure.
     652             :                          * In the second step, the dev_replace state is
     653             :                          * read from the device tree and it is known
     654             :                          * whether the procedure is really active or
     655             :                          * not, which means whether this device is
     656             :                          * used or whether it should be removed.
     657             :                          */
     658           0 :                         if (step == 0 || device->is_tgtdev_for_dev_replace) {
     659           0 :                                 continue;
     660             :                         }
     661             :                 }
     662           0 :                 if (device->bdev) {
     663           0 :                         blkdev_put(device->bdev, device->mode);
     664           0 :                         device->bdev = NULL;
     665           0 :                         fs_devices->open_devices--;
     666             :                 }
     667           0 :                 if (device->writeable) {
     668           0 :                         list_del_init(&device->dev_alloc_list);
     669           0 :                         device->writeable = 0;
     670           0 :                         if (!device->is_tgtdev_for_dev_replace)
     671           0 :                                 fs_devices->rw_devices--;
     672             :                 }
     673             :                 list_del_init(&device->dev_list);
     674           0 :                 fs_devices->num_devices--;
     675           0 :                 rcu_string_free(device->name);
     676           0 :                 kfree(device);
     677             :         }
     678             : 
     679         442 :         if (fs_devices->seed) {
     680             :                 fs_devices = fs_devices->seed;
     681             :                 goto again;
     682             :         }
     683             : 
     684         442 :         fs_devices->latest_bdev = latest_bdev;
     685         442 :         fs_devices->latest_devid = latest_devid;
     686         442 :         fs_devices->latest_trans = latest_transid;
     687             : 
     688         442 :         mutex_unlock(&uuid_mutex);
     689         442 : }
     690             : 
     691         256 : static void __free_device(struct work_struct *work)
     692             : {
     693             :         struct btrfs_device *device;
     694             : 
     695         256 :         device = container_of(work, struct btrfs_device, rcu_work);
     696             : 
     697         256 :         if (device->bdev)
     698         256 :                 blkdev_put(device->bdev, device->mode);
     699             : 
     700         256 :         rcu_string_free(device->name);
     701         256 :         kfree(device);
     702         256 : }
     703             : 
     704         256 : static void free_device(struct rcu_head *head)
     705             : {
     706             :         struct btrfs_device *device;
     707             : 
     708             :         device = container_of(head, struct btrfs_device, rcu);
     709             : 
     710         512 :         INIT_WORK(&device->rcu_work, __free_device);
     711         256 :         schedule_work(&device->rcu_work);
     712         256 : }
     713             : 
     714         223 : static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
     715             : {
     716             :         struct btrfs_device *device;
     717             : 
     718         223 :         if (--fs_devices->opened > 0)
     719             :                 return 0;
     720             : 
     721         221 :         mutex_lock(&fs_devices->device_list_mutex);
     722         469 :         list_for_each_entry(device, &fs_devices->devices, dev_list) {
     723             :                 struct btrfs_device *new_device;
     724             :                 struct rcu_string *name;
     725             : 
     726         248 :                 if (device->bdev)
     727         248 :                         fs_devices->open_devices--;
     728             : 
     729         496 :                 if (device->writeable &&
     730         248 :                     device->devid != BTRFS_DEV_REPLACE_DEVID) {
     731         248 :                         list_del_init(&device->dev_alloc_list);
     732         248 :                         fs_devices->rw_devices--;
     733             :                 }
     734             : 
     735         248 :                 if (device->can_discard)
     736           0 :                         fs_devices->num_can_discard--;
     737         248 :                 if (device->missing)
     738           0 :                         fs_devices->missing_devices--;
     739             : 
     740         248 :                 new_device = btrfs_alloc_device(NULL, &device->devid,
     741         248 :                                                 device->uuid);
     742         248 :                 BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
     743             : 
     744             :                 /* Safe because we are under uuid_mutex */
     745         248 :                 if (device->name) {
     746         248 :                         name = rcu_string_strdup(device->name->str, GFP_NOFS);
     747         248 :                         BUG_ON(!name); /* -ENOMEM */
     748         248 :                         rcu_assign_pointer(new_device->name, name);
     749             :                 }
     750             : 
     751         248 :                 list_replace_rcu(&device->dev_list, &new_device->dev_list);
     752         248 :                 new_device->fs_devices = device->fs_devices;
     753             : 
     754         248 :                 call_rcu(&device->rcu, free_device);
     755             :         }
     756         221 :         mutex_unlock(&fs_devices->device_list_mutex);
     757             : 
     758         221 :         WARN_ON(fs_devices->open_devices);
     759         221 :         WARN_ON(fs_devices->rw_devices);
     760         221 :         fs_devices->opened = 0;
     761         221 :         fs_devices->seeding = 0;
     762             : 
     763         221 :         return 0;
     764             : }
     765             : 
     766         223 : int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
     767             : {
     768             :         struct btrfs_fs_devices *seed_devices = NULL;
     769             :         int ret;
     770             : 
     771         223 :         mutex_lock(&uuid_mutex);
     772         223 :         ret = __btrfs_close_devices(fs_devices);
     773         223 :         if (!fs_devices->opened) {
     774         221 :                 seed_devices = fs_devices->seed;
     775         221 :                 fs_devices->seed = NULL;
     776             :         }
     777         223 :         mutex_unlock(&uuid_mutex);
     778             : 
     779         446 :         while (seed_devices) {
     780             :                 fs_devices = seed_devices;
     781           0 :                 seed_devices = fs_devices->seed;
     782           0 :                 __btrfs_close_devices(fs_devices);
     783           0 :                 free_fs_devices(fs_devices);
     784             :         }
     785             :         /*
     786             :          * Wait for rcu kworkers under __btrfs_close_devices
     787             :          * to finish all blkdev_puts so device is really
     788             :          * free when umount is done.
     789             :          */
     790         223 :         rcu_barrier();
     791         223 :         return ret;
     792             : }
     793             : 
     794         221 : static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
     795             :                                 fmode_t flags, void *holder)
     796             : {
     797             :         struct request_queue *q;
     798             :         struct block_device *bdev;
     799         221 :         struct list_head *head = &fs_devices->devices;
     800             :         struct btrfs_device *device;
     801             :         struct block_device *latest_bdev = NULL;
     802             :         struct buffer_head *bh;
     803             :         struct btrfs_super_block *disk_super;
     804             :         u64 latest_devid = 0;
     805             :         u64 latest_transid = 0;
     806             :         u64 devid;
     807             :         int seeding = 1;
     808             :         int ret = 0;
     809             : 
     810         221 :         flags |= FMODE_EXCL;
     811             : 
     812         469 :         list_for_each_entry(device, head, dev_list) {
     813         248 :                 if (device->bdev)
     814           0 :                         continue;
     815         248 :                 if (!device->name)
     816           0 :                         continue;
     817             : 
     818             :                 /* Just open everything we can; ignore failures here */
     819         248 :                 if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
     820             :                                             &bdev, &bh))
     821           0 :                         continue;
     822             : 
     823         248 :                 disk_super = (struct btrfs_super_block *)bh->b_data;
     824             :                 devid = btrfs_stack_device_id(&disk_super->dev_item);
     825         248 :                 if (devid != device->devid)
     826             :                         goto error_brelse;
     827             : 
     828         248 :                 if (memcmp(device->uuid, disk_super->dev_item.uuid,
     829             :                            BTRFS_UUID_SIZE))
     830             :                         goto error_brelse;
     831             : 
     832         248 :                 device->generation = btrfs_super_generation(disk_super);
     833         248 :                 if (!latest_transid || device->generation > latest_transid) {
     834             :                         latest_devid = devid;
     835             :                         latest_transid = device->generation;
     836         221 :                         latest_bdev = bdev;
     837             :                 }
     838             : 
     839         248 :                 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
     840           0 :                         device->writeable = 0;
     841             :                 } else {
     842         744 :                         device->writeable = !bdev_read_only(bdev);
     843             :                         seeding = 0;
     844             :                 }
     845             : 
     846         248 :                 q = bdev_get_queue(bdev);
     847         248 :                 if (blk_queue_discard(q)) {
     848           0 :                         device->can_discard = 1;
     849           0 :                         fs_devices->num_can_discard++;
     850             :                 }
     851             : 
     852         248 :                 device->bdev = bdev;
     853         248 :                 device->in_fs_metadata = 0;
     854         248 :                 device->mode = flags;
     855             : 
     856         248 :                 if (!blk_queue_nonrot(bdev_get_queue(bdev)))
     857         248 :                         fs_devices->rotating = 1;
     858             : 
     859         248 :                 fs_devices->open_devices++;
     860         496 :                 if (device->writeable &&
     861         248 :                     device->devid != BTRFS_DEV_REPLACE_DEVID) {
     862         248 :                         fs_devices->rw_devices++;
     863         248 :                         list_add(&device->dev_alloc_list,
     864             :                                  &fs_devices->alloc_list);
     865             :                 }
     866         248 :                 brelse(bh);
     867         248 :                 continue;
     868             : 
     869             : error_brelse:
     870             :                 brelse(bh);
     871           0 :                 blkdev_put(bdev, flags);
     872           0 :                 continue;
     873             :         }
     874         221 :         if (fs_devices->open_devices == 0) {
     875             :                 ret = -EINVAL;
     876             :                 goto out;
     877             :         }
     878         221 :         fs_devices->seeding = seeding;
     879         221 :         fs_devices->opened = 1;
     880         221 :         fs_devices->latest_bdev = latest_bdev;
     881         221 :         fs_devices->latest_devid = latest_devid;
     882         221 :         fs_devices->latest_trans = latest_transid;
     883         221 :         fs_devices->total_rw_bytes = 0;
     884             : out:
     885         221 :         return ret;
     886             : }
     887             : 
     888         223 : int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
     889             :                        fmode_t flags, void *holder)
     890             : {
     891             :         int ret;
     892             : 
     893         223 :         mutex_lock(&uuid_mutex);
     894         223 :         if (fs_devices->opened) {
     895           2 :                 fs_devices->opened++;
     896             :                 ret = 0;
     897             :         } else {
     898         221 :                 ret = __btrfs_open_devices(fs_devices, flags, holder);
     899             :         }
     900         223 :         mutex_unlock(&uuid_mutex);
     901         223 :         return ret;
     902             : }
     903             : 
     904             : /*
     905             :  * Look for a btrfs signature on a device. This may be called out of the mount path
     906             :  * and we are not allowed to call set_blocksize during the scan. The superblock
     907             :  * is read via pagecache
     908             :  */
     909         617 : int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
     910             :                           struct btrfs_fs_devices **fs_devices_ret)
     911             : {
     912             :         struct btrfs_super_block *disk_super;
     913             :         struct block_device *bdev;
     914             :         struct page *page;
     915             :         void *p;
     916             :         int ret = -EINVAL;
     917             :         u64 devid;
     918             :         u64 transid;
     919             :         u64 total_devices;
     920             :         u64 bytenr;
     921             :         pgoff_t index;
     922             : 
     923             :         /*
     924             :          * we would like to check all the supers, but that would make
     925             :          * a btrfs mount succeed after a mkfs from a different FS.
     926             :          * So, we need to add a special mount option to scan for
     927             :          * later supers, using BTRFS_SUPER_MIRROR_MAX instead
     928             :          */
     929             :         bytenr = btrfs_sb_offset(0);
     930         617 :         flags |= FMODE_EXCL;
     931         617 :         mutex_lock(&uuid_mutex);
     932             : 
     933         617 :         bdev = blkdev_get_by_path(path, flags, holder);
     934             : 
     935         617 :         if (IS_ERR(bdev)) {
     936           0 :                 ret = PTR_ERR(bdev);
     937           0 :                 goto error;
     938             :         }
     939             : 
     940             :         /* make sure our super fits in the device */
     941         617 :         if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
     942             :                 goto error_bdev_put;
     943             : 
     944             :         /* make sure our super fits in the page */
     945             :         if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
     946             :                 goto error_bdev_put;
     947             : 
     948             :         /* make sure our super doesn't straddle pages on disk */
     949             :         index = bytenr >> PAGE_CACHE_SHIFT;
     950             :         if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
     951             :                 goto error_bdev_put;
     952             : 
     953             :         /* pull in the page with our super */
     954         617 :         page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
     955             :                                    index, GFP_NOFS);
     956             : 
     957         617 :         if (IS_ERR_OR_NULL(page))
     958             :                 goto error_bdev_put;
     959             : 
     960             :         p = kmap(page);
     961             : 
     962             :         /* align our pointer to the offset of the super block */
     963             :         disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
     964             : 
     965        1230 :         if (btrfs_super_bytenr(disk_super) != bytenr ||
     966             :             btrfs_super_magic(disk_super) != BTRFS_MAGIC)
     967             :                 goto error_unmap;
     968             : 
     969             :         devid = btrfs_stack_device_id(&disk_super->dev_item);
     970             :         transid = btrfs_super_generation(disk_super);
     971             :         total_devices = btrfs_super_num_devices(disk_super);
     972             : 
     973         613 :         ret = device_list_add(path, disk_super, devid, fs_devices_ret);
     974         613 :         if (ret > 0) {
     975         126 :                 if (disk_super->label[0]) {
     976           0 :                         if (disk_super->label[BTRFS_LABEL_SIZE - 1])
     977           0 :                                 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
     978           0 :                         printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
     979             :                 } else {
     980         126 :                         printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
     981             :                 }
     982             : 
     983         126 :                 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
     984             :                 ret = 0;
     985             :         }
     986         613 :         if (!ret && fs_devices_ret)
     987         613 :                 (*fs_devices_ret)->total_devices = total_devices;
     988             : 
     989             : error_unmap:
     990             :         kunmap(page);
     991         617 :         page_cache_release(page);
     992             : 
     993             : error_bdev_put:
     994         617 :         blkdev_put(bdev, flags);
     995             : error:
     996         617 :         mutex_unlock(&uuid_mutex);
     997         617 :         return ret;
     998             : }
     999             : 
    1000             : /* helper to account the used device space in the range */
    1001       48490 : int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
    1002             :                                    u64 end, u64 *length)
    1003             : {
    1004             :         struct btrfs_key key;
    1005       48490 :         struct btrfs_root *root = device->dev_root;
    1006             :         struct btrfs_dev_extent *dev_extent;
    1007             :         struct btrfs_path *path;
    1008             :         u64 extent_end;
    1009             :         int ret;
    1010             :         int slot;
    1011       48510 :         struct extent_buffer *l;
    1012             : 
    1013       48490 :         *length = 0;
    1014             : 
    1015       48490 :         if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace)
    1016             :                 return 0;
    1017             : 
    1018       48490 :         path = btrfs_alloc_path();
    1019       48490 :         if (!path)
    1020             :                 return -ENOMEM;
    1021       48490 :         path->reada = 2;
    1022             : 
    1023       48490 :         key.objectid = device->devid;
    1024       48490 :         key.offset = start;
    1025       48490 :         key.type = BTRFS_DEV_EXTENT_KEY;
    1026             : 
    1027       48490 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    1028       48490 :         if (ret < 0)
    1029             :                 goto out;
    1030       48490 :         if (ret > 0) {
    1031          20 :                 ret = btrfs_previous_item(root, path, key.objectid, key.type);
    1032          20 :                 if (ret < 0)
    1033             :                         goto out;
    1034             :         }
    1035             : 
    1036             :         while (1) {
    1037       48510 :                 l = path->nodes[0];
    1038       48510 :                 slot = path->slots[0];
    1039       97020 :                 if (slot >= btrfs_header_nritems(l)) {
    1040           0 :                         ret = btrfs_next_leaf(root, path);
    1041           0 :                         if (ret == 0)
    1042           0 :                                 continue;
    1043           0 :                         if (ret < 0)
    1044             :                                 goto out;
    1045             : 
    1046             :                         break;
    1047             :                 }
    1048       48510 :                 btrfs_item_key_to_cpu(l, &key, slot);
    1049             : 
    1050       48510 :                 if (key.objectid < device->devid)
    1051             :                         goto next;
    1052             : 
    1053       48490 :                 if (key.objectid > device->devid)
    1054             :                         break;
    1055             : 
    1056       48490 :                 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
    1057             :                         goto next;
    1058             : 
    1059       48490 :                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
    1060       96980 :                 extent_end = key.offset + btrfs_dev_extent_length(l,
    1061             :                                                                   dev_extent);
    1062       48490 :                 if (key.offset <= start && extent_end > end) {
    1063       48470 :                         *length = end - start + 1;
    1064       48470 :                         break;
    1065          20 :                 } else if (key.offset <= start && extent_end > start)
    1066           0 :                         *length += extent_end - start;
    1067          20 :                 else if (key.offset > start && extent_end <= end)
    1068           0 :                         *length += extent_end - key.offset;
    1069          20 :                 else if (key.offset > start && key.offset <= end) {
    1070           0 :                         *length += end - key.offset + 1;
    1071           0 :                         break;
    1072          20 :                 } else if (key.offset > end)
    1073             :                         break;
    1074             : 
    1075             : next:
    1076          20 :                 path->slots[0]++;
    1077             :         }
    1078             :         ret = 0;
    1079             : out:
    1080       48490 :         btrfs_free_path(path);
    1081       48490 :         return ret;
    1082             : }
    1083             : 
    1084         185 : static int contains_pending_extent(struct btrfs_trans_handle *trans,
    1085             :                                    struct btrfs_device *device,
    1086             :                                    u64 *start, u64 len)
    1087             : {
    1088             :         struct extent_map *em;
    1089             :         int ret = 0;
    1090             : 
    1091         191 :         list_for_each_entry(em, &trans->transaction->pending_chunks, list) {
    1092             :                 struct map_lookup *map;
    1093             :                 int i;
    1094             : 
    1095           6 :                 map = (struct map_lookup *)em->bdev;
    1096          10 :                 for (i = 0; i < map->num_stripes; i++) {
    1097          10 :                         if (map->stripes[i].dev != device)
    1098           4 :                                 continue;
    1099          12 :                         if (map->stripes[i].physical >= *start + len ||
    1100           6 :                             map->stripes[i].physical + em->orig_block_len <=
    1101             :                             *start)
    1102           3 :                                 continue;
    1103           3 :                         *start = map->stripes[i].physical +
    1104             :                                 em->orig_block_len;
    1105             :                         ret = 1;
    1106             :                 }
    1107             :         }
    1108             : 
    1109         185 :         return ret;
    1110             : }
    1111             : 
    1112             : 
    1113             : /*
    1114             :  * find_free_dev_extent - find free space in the specified device
    1115             :  * @device:     the device which we search the free space in
    1116             :  * @num_bytes:  the size of the free space that we need
    1117             :  * @start:      store the start of the free space.
    1118             :  * @len:        the size of the free space. that we find, or the size of the max
    1119             :  *              free space if we don't find suitable free space
    1120             :  *
    1121             :  * this uses a pretty simple search, the expectation is that it is
    1122             :  * called very infrequently and that a given device has a small number
    1123             :  * of extents
    1124             :  *
    1125             :  * @start is used to store the start of the free space if we find. But if we
    1126             :  * don't find suitable free space, it will be used to store the start position
    1127             :  * of the max free space.
    1128             :  *
    1129             :  * @len is used to store the size of the free space that we find.
    1130             :  * But if we don't find suitable free space, it is used to store the size of
    1131             :  * the max free space.
    1132             :  */
    1133         149 : int find_free_dev_extent(struct btrfs_trans_handle *trans,
    1134             :                          struct btrfs_device *device, u64 num_bytes,
    1135             :                          u64 *start, u64 *len)
    1136             : {
    1137             :         struct btrfs_key key;
    1138         149 :         struct btrfs_root *root = device->dev_root;
    1139             :         struct btrfs_dev_extent *dev_extent;
    1140             :         struct btrfs_path *path;
    1141             :         u64 hole_size;
    1142             :         u64 max_hole_start;
    1143             :         u64 max_hole_size;
    1144             :         u64 extent_end;
    1145             :         u64 search_start;
    1146         149 :         u64 search_end = device->total_bytes;
    1147             :         int ret;
    1148             :         int slot;
    1149         742 :         struct extent_buffer *l;
    1150             : 
    1151             :         /* FIXME use last free of some kind */
    1152             : 
    1153             :         /* we don't want to overwrite the superblock on the drive,
    1154             :          * so we make sure to start at an offset of at least 1MB
    1155             :          */
    1156         149 :         search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
    1157             : 
    1158         149 :         path = btrfs_alloc_path();
    1159         149 :         if (!path)
    1160             :                 return -ENOMEM;
    1161             : again:
    1162         152 :         max_hole_start = search_start;
    1163             :         max_hole_size = 0;
    1164             :         hole_size = 0;
    1165             : 
    1166         152 :         if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
    1167             :                 ret = -ENOSPC;
    1168             :                 goto out;
    1169             :         }
    1170             : 
    1171         152 :         path->reada = 2;
    1172         152 :         path->search_commit_root = 1;
    1173         152 :         path->skip_locking = 1;
    1174             : 
    1175         152 :         key.objectid = device->devid;
    1176         152 :         key.offset = search_start;
    1177         152 :         key.type = BTRFS_DEV_EXTENT_KEY;
    1178             : 
    1179         152 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    1180         152 :         if (ret < 0)
    1181             :                 goto out;
    1182         152 :         if (ret > 0) {
    1183          80 :                 ret = btrfs_previous_item(root, path, key.objectid, key.type);
    1184          80 :                 if (ret < 0)
    1185             :                         goto out;
    1186             :         }
    1187             : 
    1188             :         while (1) {
    1189         742 :                 l = path->nodes[0];
    1190         742 :                 slot = path->slots[0];
    1191        1484 :                 if (slot >= btrfs_header_nritems(l)) {
    1192          55 :                         ret = btrfs_next_leaf(root, path);
    1193          55 :                         if (ret == 0)
    1194           0 :                                 continue;
    1195          55 :                         if (ret < 0)
    1196             :                                 goto out;
    1197             : 
    1198             :                         break;
    1199             :                 }
    1200         687 :                 btrfs_item_key_to_cpu(l, &key, slot);
    1201             : 
    1202         687 :                 if (key.objectid < device->devid)
    1203             :                         goto next;
    1204             : 
    1205         643 :                 if (key.objectid > device->devid)
    1206             :                         break;
    1207             : 
    1208         640 :                 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
    1209             :                         goto next;
    1210             : 
    1211         640 :                 if (key.offset > search_start) {
    1212         127 :                         hole_size = key.offset - search_start;
    1213             : 
    1214             :                         /*
    1215             :                          * Have to check before we set max_hole_start, otherwise
    1216             :                          * we could end up sending back this offset anyway.
    1217             :                          */
    1218         127 :                         if (contains_pending_extent(trans, device,
    1219             :                                                     &search_start,
    1220             :                                                     hole_size))
    1221             :                                 hole_size = 0;
    1222             : 
    1223         127 :                         if (hole_size > max_hole_size) {
    1224         125 :                                 max_hole_start = search_start;
    1225             :                                 max_hole_size = hole_size;
    1226             :                         }
    1227             : 
    1228             :                         /*
    1229             :                          * If this free space is greater than which we need,
    1230             :                          * it must be the max free space that we have found
    1231             :                          * until now, so max_hole_start must point to the start
    1232             :                          * of this free space and the length of this free space
    1233             :                          * is stored in max_hole_size. Thus, we return
    1234             :                          * max_hole_start and max_hole_size and go back to the
    1235             :                          * caller.
    1236             :                          */
    1237         127 :                         if (hole_size >= num_bytes) {
    1238             :                                 ret = 0;
    1239             :                                 goto out;
    1240             :                         }
    1241             :                 }
    1242             : 
    1243         546 :                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
    1244        1092 :                 extent_end = key.offset + btrfs_dev_extent_length(l,
    1245             :                                                                   dev_extent);
    1246         546 :                 if (extent_end > search_start)
    1247         543 :                         search_start = extent_end;
    1248             : next:
    1249         590 :                 path->slots[0]++;
    1250         590 :                 cond_resched();
    1251             :         }
    1252             : 
    1253             :         /*
    1254             :          * At this point, search_start should be the end of
    1255             :          * allocated dev extents, and when shrinking the device,
    1256             :          * search_end may be smaller than search_start.
    1257             :          */
    1258          58 :         if (search_end > search_start)
    1259          58 :                 hole_size = search_end - search_start;
    1260             : 
    1261          58 :         if (hole_size > max_hole_size) {
    1262             :                 max_hole_start = search_start;
    1263             :                 max_hole_size = hole_size;
    1264             :         }
    1265             : 
    1266          58 :         if (contains_pending_extent(trans, device, &search_start, hole_size)) {
    1267           3 :                 btrfs_release_path(path);
    1268           3 :                 goto again;
    1269             :         }
    1270             : 
    1271             :         /* See above. */
    1272          55 :         if (hole_size < num_bytes)
    1273             :                 ret = -ENOSPC;
    1274             :         else
    1275             :                 ret = 0;
    1276             : 
    1277             : out:
    1278         149 :         btrfs_free_path(path);
    1279         149 :         *start = max_hole_start;
    1280         149 :         if (len)
    1281          89 :                 *len = max_hole_size;
    1282         149 :         return ret;
    1283             : }
    1284             : 
    1285         116 : static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
    1286             :                           struct btrfs_device *device,
    1287             :                           u64 start)
    1288             : {
    1289             :         int ret;
    1290             :         struct btrfs_path *path;
    1291         116 :         struct btrfs_root *root = device->dev_root;
    1292             :         struct btrfs_key key;
    1293             :         struct btrfs_key found_key;
    1294             :         struct extent_buffer *leaf = NULL;
    1295             :         struct btrfs_dev_extent *extent = NULL;
    1296             : 
    1297         116 :         path = btrfs_alloc_path();
    1298         116 :         if (!path)
    1299             :                 return -ENOMEM;
    1300             : 
    1301         116 :         key.objectid = device->devid;
    1302         116 :         key.offset = start;
    1303         116 :         key.type = BTRFS_DEV_EXTENT_KEY;
    1304             : again:
    1305         116 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    1306         116 :         if (ret > 0) {
    1307           0 :                 ret = btrfs_previous_item(root, path, key.objectid,
    1308             :                                           BTRFS_DEV_EXTENT_KEY);
    1309           0 :                 if (ret)
    1310             :                         goto out;
    1311           0 :                 leaf = path->nodes[0];
    1312           0 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    1313           0 :                 extent = btrfs_item_ptr(leaf, path->slots[0],
    1314             :                                         struct btrfs_dev_extent);
    1315           0 :                 BUG_ON(found_key.offset > start || found_key.offset +
    1316             :                        btrfs_dev_extent_length(leaf, extent) < start);
    1317           0 :                 key = found_key;
    1318           0 :                 btrfs_release_path(path);
    1319           0 :                 goto again;
    1320         116 :         } else if (ret == 0) {
    1321         116 :                 leaf = path->nodes[0];
    1322         232 :                 extent = btrfs_item_ptr(leaf, path->slots[0],
    1323             :                                         struct btrfs_dev_extent);
    1324             :         } else {
    1325           0 :                 btrfs_error(root->fs_info, ret, "Slot search failed");
    1326           0 :                 goto out;
    1327             :         }
    1328             : 
    1329         116 :         if (device->bytes_used > 0) {
    1330             :                 u64 len = btrfs_dev_extent_length(leaf, extent);
    1331         116 :                 device->bytes_used -= len;
    1332         116 :                 spin_lock(&root->fs_info->free_chunk_lock);
    1333         116 :                 root->fs_info->free_chunk_space += len;
    1334         116 :                 spin_unlock(&root->fs_info->free_chunk_lock);
    1335             :         }
    1336             :         ret = btrfs_del_item(trans, root, path);
    1337         116 :         if (ret) {
    1338           0 :                 btrfs_error(root->fs_info, ret,
    1339             :                             "Failed to remove dev extent item");
    1340             :         }
    1341             : out:
    1342         116 :         btrfs_free_path(path);
    1343         116 :         return ret;
    1344             : }
    1345             : 
    1346         133 : static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
    1347             :                                   struct btrfs_device *device,
    1348             :                                   u64 chunk_tree, u64 chunk_objectid,
    1349             :                                   u64 chunk_offset, u64 start, u64 num_bytes)
    1350             : {
    1351             :         int ret;
    1352             :         struct btrfs_path *path;
    1353         133 :         struct btrfs_root *root = device->dev_root;
    1354             :         struct btrfs_dev_extent *extent;
    1355             :         struct extent_buffer *leaf;
    1356             :         struct btrfs_key key;
    1357             : 
    1358         133 :         WARN_ON(!device->in_fs_metadata);
    1359         133 :         WARN_ON(device->is_tgtdev_for_dev_replace);
    1360         133 :         path = btrfs_alloc_path();
    1361         133 :         if (!path)
    1362             :                 return -ENOMEM;
    1363             : 
    1364         133 :         key.objectid = device->devid;
    1365         133 :         key.offset = start;
    1366         133 :         key.type = BTRFS_DEV_EXTENT_KEY;
    1367             :         ret = btrfs_insert_empty_item(trans, root, path, &key,
    1368             :                                       sizeof(*extent));
    1369         133 :         if (ret)
    1370             :                 goto out;
    1371             : 
    1372         133 :         leaf = path->nodes[0];
    1373         266 :         extent = btrfs_item_ptr(leaf, path->slots[0],
    1374             :                                 struct btrfs_dev_extent);
    1375             :         btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
    1376             :         btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
    1377             :         btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
    1378             : 
    1379         133 :         write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
    1380             :                     btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE);
    1381             : 
    1382             :         btrfs_set_dev_extent_length(leaf, extent, num_bytes);
    1383         133 :         btrfs_mark_buffer_dirty(leaf);
    1384             : out:
    1385         133 :         btrfs_free_path(path);
    1386         133 :         return ret;
    1387             : }
    1388             : 
    1389          87 : static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
    1390             : {
    1391             :         struct extent_map_tree *em_tree;
    1392             :         struct extent_map *em;
    1393             :         struct rb_node *n;
    1394             :         u64 ret = 0;
    1395             : 
    1396             :         em_tree = &fs_info->mapping_tree.map_tree;
    1397          87 :         read_lock(&em_tree->lock);
    1398          87 :         n = rb_last(&em_tree->map);
    1399          87 :         if (n) {
    1400             :                 em = rb_entry(n, struct extent_map, rb_node);
    1401          87 :                 ret = em->start + em->len;
    1402             :         }
    1403             :         read_unlock(&em_tree->lock);
    1404             : 
    1405          87 :         return ret;
    1406             : }
    1407             : 
    1408           0 : static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
    1409             :                                     u64 *devid_ret)
    1410             : {
    1411             :         int ret;
    1412             :         struct btrfs_key key;
    1413             :         struct btrfs_key found_key;
    1414             :         struct btrfs_path *path;
    1415             : 
    1416           0 :         path = btrfs_alloc_path();
    1417           0 :         if (!path)
    1418             :                 return -ENOMEM;
    1419             : 
    1420           0 :         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
    1421           0 :         key.type = BTRFS_DEV_ITEM_KEY;
    1422           0 :         key.offset = (u64)-1;
    1423             : 
    1424           0 :         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
    1425           0 :         if (ret < 0)
    1426             :                 goto error;
    1427             : 
    1428           0 :         BUG_ON(ret == 0); /* Corruption */
    1429             : 
    1430           0 :         ret = btrfs_previous_item(fs_info->chunk_root, path,
    1431             :                                   BTRFS_DEV_ITEMS_OBJECTID,
    1432             :                                   BTRFS_DEV_ITEM_KEY);
    1433           0 :         if (ret) {
    1434           0 :                 *devid_ret = 1;
    1435             :         } else {
    1436           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
    1437             :                                       path->slots[0]);
    1438           0 :                 *devid_ret = found_key.offset + 1;
    1439             :         }
    1440             :         ret = 0;
    1441             : error:
    1442           0 :         btrfs_free_path(path);
    1443             :         return ret;
    1444             : }
    1445             : 
    1446             : /*
    1447             :  * the device information is stored in the chunk root
    1448             :  * the btrfs_device struct should be fully filled in
    1449             :  */
    1450           0 : static int btrfs_add_device(struct btrfs_trans_handle *trans,
    1451             :                             struct btrfs_root *root,
    1452             :                             struct btrfs_device *device)
    1453             : {
    1454             :         int ret;
    1455             :         struct btrfs_path *path;
    1456             :         struct btrfs_dev_item *dev_item;
    1457             :         struct extent_buffer *leaf;
    1458             :         struct btrfs_key key;
    1459             :         unsigned long ptr;
    1460             : 
    1461           0 :         root = root->fs_info->chunk_root;
    1462             : 
    1463           0 :         path = btrfs_alloc_path();
    1464           0 :         if (!path)
    1465             :                 return -ENOMEM;
    1466             : 
    1467           0 :         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
    1468           0 :         key.type = BTRFS_DEV_ITEM_KEY;
    1469           0 :         key.offset = device->devid;
    1470             : 
    1471             :         ret = btrfs_insert_empty_item(trans, root, path, &key,
    1472             :                                       sizeof(*dev_item));
    1473           0 :         if (ret)
    1474             :                 goto out;
    1475             : 
    1476           0 :         leaf = path->nodes[0];
    1477           0 :         dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
    1478             : 
    1479           0 :         btrfs_set_device_id(leaf, dev_item, device->devid);
    1480             :         btrfs_set_device_generation(leaf, dev_item, 0);
    1481           0 :         btrfs_set_device_type(leaf, dev_item, device->type);
    1482           0 :         btrfs_set_device_io_align(leaf, dev_item, device->io_align);
    1483           0 :         btrfs_set_device_io_width(leaf, dev_item, device->io_width);
    1484           0 :         btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
    1485           0 :         btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
    1486           0 :         btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
    1487             :         btrfs_set_device_group(leaf, dev_item, 0);
    1488             :         btrfs_set_device_seek_speed(leaf, dev_item, 0);
    1489             :         btrfs_set_device_bandwidth(leaf, dev_item, 0);
    1490             :         btrfs_set_device_start_offset(leaf, dev_item, 0);
    1491             : 
    1492             :         ptr = btrfs_device_uuid(dev_item);
    1493           0 :         write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
    1494             :         ptr = btrfs_device_fsid(dev_item);
    1495           0 :         write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
    1496           0 :         btrfs_mark_buffer_dirty(leaf);
    1497             : 
    1498             :         ret = 0;
    1499             : out:
    1500           0 :         btrfs_free_path(path);
    1501             :         return ret;
    1502             : }
    1503             : 
    1504             : /*
    1505             :  * Function to update ctime/mtime for a given device path.
    1506             :  * Mainly used for ctime/mtime based probe like libblkid.
    1507             :  */
    1508           0 : static void update_dev_time(char *path_name)
    1509             : {
    1510             :         struct file *filp;
    1511             : 
    1512           0 :         filp = filp_open(path_name, O_RDWR, 0);
    1513           0 :         if (!filp)
    1514             :                 return;
    1515           0 :         file_update_time(filp);
    1516           0 :         filp_close(filp, NULL);
    1517           0 :         return;
    1518             : }
    1519             : 
    1520           0 : static int btrfs_rm_dev_item(struct btrfs_root *root,
    1521             :                              struct btrfs_device *device)
    1522             : {
    1523             :         int ret;
    1524             :         struct btrfs_path *path;
    1525             :         struct btrfs_key key;
    1526             :         struct btrfs_trans_handle *trans;
    1527             : 
    1528           0 :         root = root->fs_info->chunk_root;
    1529             : 
    1530           0 :         path = btrfs_alloc_path();
    1531           0 :         if (!path)
    1532             :                 return -ENOMEM;
    1533             : 
    1534           0 :         trans = btrfs_start_transaction(root, 0);
    1535           0 :         if (IS_ERR(trans)) {
    1536           0 :                 btrfs_free_path(path);
    1537           0 :                 return PTR_ERR(trans);
    1538             :         }
    1539           0 :         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
    1540           0 :         key.type = BTRFS_DEV_ITEM_KEY;
    1541           0 :         key.offset = device->devid;
    1542             :         lock_chunks(root);
    1543             : 
    1544           0 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    1545           0 :         if (ret < 0)
    1546             :                 goto out;
    1547             : 
    1548           0 :         if (ret > 0) {
    1549             :                 ret = -ENOENT;
    1550             :                 goto out;
    1551             :         }
    1552             : 
    1553             :         ret = btrfs_del_item(trans, root, path);
    1554             :         if (ret)
    1555             :                 goto out;
    1556             : out:
    1557           0 :         btrfs_free_path(path);
    1558             :         unlock_chunks(root);
    1559           0 :         btrfs_commit_transaction(trans, root);
    1560             :         return ret;
    1561             : }
    1562             : 
    1563           0 : int btrfs_rm_device(struct btrfs_root *root, char *device_path)
    1564             : {
    1565             :         struct btrfs_device *device;
    1566             :         struct btrfs_device *next_device;
    1567             :         struct block_device *bdev;
    1568           0 :         struct buffer_head *bh = NULL;
    1569             :         struct btrfs_super_block *disk_super;
    1570             :         struct btrfs_fs_devices *cur_devices;
    1571             :         u64 all_avail;
    1572             :         u64 devid;
    1573             :         u64 num_devices;
    1574             :         u8 *dev_uuid;
    1575             :         unsigned seq;
    1576             :         int ret = 0;
    1577             :         bool clear_super = false;
    1578             : 
    1579           0 :         mutex_lock(&uuid_mutex);
    1580             : 
    1581             :         do {
    1582           0 :                 seq = read_seqbegin(&root->fs_info->profiles_lock);
    1583             : 
    1584           0 :                 all_avail = root->fs_info->avail_data_alloc_bits |
    1585           0 :                             root->fs_info->avail_system_alloc_bits |
    1586           0 :                             root->fs_info->avail_metadata_alloc_bits;
    1587           0 :         } while (read_seqretry(&root->fs_info->profiles_lock, seq));
    1588             : 
    1589           0 :         num_devices = root->fs_info->fs_devices->num_devices;
    1590           0 :         btrfs_dev_replace_lock(&root->fs_info->dev_replace);
    1591           0 :         if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
    1592           0 :                 WARN_ON(num_devices < 1);
    1593           0 :                 num_devices--;
    1594             :         }
    1595           0 :         btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
    1596             : 
    1597           0 :         if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
    1598             :                 ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
    1599             :                 goto out;
    1600             :         }
    1601             : 
    1602           0 :         if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
    1603             :                 ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET;
    1604             :                 goto out;
    1605             :         }
    1606             : 
    1607           0 :         if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) &&
    1608           0 :             root->fs_info->fs_devices->rw_devices <= 2) {
    1609             :                 ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET;
    1610             :                 goto out;
    1611             :         }
    1612           0 :         if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) &&
    1613           0 :             root->fs_info->fs_devices->rw_devices <= 3) {
    1614             :                 ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET;
    1615             :                 goto out;
    1616             :         }
    1617             : 
    1618           0 :         if (strcmp(device_path, "missing") == 0) {
    1619             :                 struct list_head *devices;
    1620             :                 struct btrfs_device *tmp;
    1621             : 
    1622             :                 device = NULL;
    1623           0 :                 devices = &root->fs_info->fs_devices->devices;
    1624             :                 /*
    1625             :                  * It is safe to read the devices since the volume_mutex
    1626             :                  * is held.
    1627             :                  */
    1628           0 :                 list_for_each_entry(tmp, devices, dev_list) {
    1629           0 :                         if (tmp->in_fs_metadata &&
    1630           0 :                             !tmp->is_tgtdev_for_dev_replace &&
    1631           0 :                             !tmp->bdev) {
    1632             :                                 device = tmp;
    1633             :                                 break;
    1634             :                         }
    1635             :                 }
    1636           0 :                 bdev = NULL;
    1637           0 :                 bh = NULL;
    1638             :                 disk_super = NULL;
    1639           0 :                 if (!device) {
    1640             :                         ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
    1641             :                         goto out;
    1642             :                 }
    1643             :         } else {
    1644           0 :                 ret = btrfs_get_bdev_and_sb(device_path,
    1645             :                                             FMODE_WRITE | FMODE_EXCL,
    1646           0 :                                             root->fs_info->bdev_holder, 0,
    1647             :                                             &bdev, &bh);
    1648           0 :                 if (ret)
    1649             :                         goto out;
    1650           0 :                 disk_super = (struct btrfs_super_block *)bh->b_data;
    1651             :                 devid = btrfs_stack_device_id(&disk_super->dev_item);
    1652           0 :                 dev_uuid = disk_super->dev_item.uuid;
    1653           0 :                 device = btrfs_find_device(root->fs_info, devid, dev_uuid,
    1654           0 :                                            disk_super->fsid);
    1655           0 :                 if (!device) {
    1656             :                         ret = -ENOENT;
    1657             :                         goto error_brelse;
    1658             :                 }
    1659             :         }
    1660             : 
    1661           0 :         if (device->is_tgtdev_for_dev_replace) {
    1662             :                 ret = BTRFS_ERROR_DEV_TGT_REPLACE;
    1663             :                 goto error_brelse;
    1664             :         }
    1665             : 
    1666           0 :         if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
    1667             :                 ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
    1668             :                 goto error_brelse;
    1669             :         }
    1670             : 
    1671           0 :         if (device->writeable) {
    1672             :                 lock_chunks(root);
    1673           0 :                 list_del_init(&device->dev_alloc_list);
    1674             :                 unlock_chunks(root);
    1675           0 :                 root->fs_info->fs_devices->rw_devices--;
    1676             :                 clear_super = true;
    1677             :         }
    1678             : 
    1679           0 :         mutex_unlock(&uuid_mutex);
    1680           0 :         ret = btrfs_shrink_device(device, 0);
    1681           0 :         mutex_lock(&uuid_mutex);
    1682           0 :         if (ret)
    1683             :                 goto error_undo;
    1684             : 
    1685             :         /*
    1686             :          * TODO: the superblock still includes this device in its num_devices
    1687             :          * counter although write_all_supers() is not locked out. This
    1688             :          * could give a filesystem state which requires a degraded mount.
    1689             :          */
    1690           0 :         ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
    1691           0 :         if (ret)
    1692             :                 goto error_undo;
    1693             : 
    1694           0 :         spin_lock(&root->fs_info->free_chunk_lock);
    1695           0 :         root->fs_info->free_chunk_space = device->total_bytes -
    1696           0 :                 device->bytes_used;
    1697           0 :         spin_unlock(&root->fs_info->free_chunk_lock);
    1698             : 
    1699           0 :         device->in_fs_metadata = 0;
    1700           0 :         btrfs_scrub_cancel_dev(root->fs_info, device);
    1701             : 
    1702             :         /*
    1703             :          * the device list mutex makes sure that we don't change
    1704             :          * the device list while someone else is writing out all
    1705             :          * the device supers. Whoever is writing all supers, should
    1706             :          * lock the device list mutex before getting the number of
    1707             :          * devices in the super block (super_copy). Conversely,
    1708             :          * whoever updates the number of devices in the super block
    1709             :          * (super_copy) should hold the device list mutex.
    1710             :          */
    1711             : 
    1712           0 :         cur_devices = device->fs_devices;
    1713           0 :         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
    1714           0 :         list_del_rcu(&device->dev_list);
    1715             : 
    1716           0 :         device->fs_devices->num_devices--;
    1717           0 :         device->fs_devices->total_devices--;
    1718             : 
    1719           0 :         if (device->missing)
    1720           0 :                 device->fs_devices->missing_devices--;
    1721             : 
    1722           0 :         next_device = list_entry(root->fs_info->fs_devices->devices.next,
    1723             :                                  struct btrfs_device, dev_list);
    1724           0 :         if (device->bdev == root->fs_info->sb->s_bdev)
    1725           0 :                 root->fs_info->sb->s_bdev = next_device->bdev;
    1726           0 :         if (device->bdev == root->fs_info->fs_devices->latest_bdev)
    1727           0 :                 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
    1728             : 
    1729           0 :         if (device->bdev) {
    1730           0 :                 device->fs_devices->open_devices--;
    1731             :                 /* remove sysfs entry */
    1732           0 :                 btrfs_kobj_rm_device(root->fs_info, device);
    1733             :         }
    1734             : 
    1735           0 :         call_rcu(&device->rcu, free_device);
    1736             : 
    1737           0 :         num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
    1738             :         btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
    1739           0 :         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
    1740             : 
    1741           0 :         if (cur_devices->open_devices == 0) {
    1742             :                 struct btrfs_fs_devices *fs_devices;
    1743           0 :                 fs_devices = root->fs_info->fs_devices;
    1744           0 :                 while (fs_devices) {
    1745           0 :                         if (fs_devices->seed == cur_devices) {
    1746           0 :                                 fs_devices->seed = cur_devices->seed;
    1747           0 :                                 break;
    1748             :                         }
    1749             :                         fs_devices = fs_devices->seed;
    1750             :                 }
    1751           0 :                 cur_devices->seed = NULL;
    1752             :                 lock_chunks(root);
    1753           0 :                 __btrfs_close_devices(cur_devices);
    1754             :                 unlock_chunks(root);
    1755           0 :                 free_fs_devices(cur_devices);
    1756             :         }
    1757             : 
    1758           0 :         root->fs_info->num_tolerated_disk_barrier_failures =
    1759           0 :                 btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info);
    1760             : 
    1761             :         /*
    1762             :          * at this point, the device is zero sized.  We want to
    1763             :          * remove it from the devices list and zero out the old super
    1764             :          */
    1765           0 :         if (clear_super && disk_super) {
    1766             :                 u64 bytenr;
    1767             :                 int i;
    1768             : 
    1769             :                 /* make sure this device isn't detected as part of
    1770             :                  * the FS anymore
    1771             :                  */
    1772           0 :                 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
    1773           0 :                 set_buffer_dirty(bh);
    1774           0 :                 sync_dirty_buffer(bh);
    1775             : 
    1776             :                 /* clear the mirror copies of super block on the disk
    1777             :                  * being removed, 0th copy is been taken care above and
    1778             :                  * the below would take of the rest
    1779             :                  */
    1780           0 :                 for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
    1781             :                         bytenr = btrfs_sb_offset(i);
    1782           0 :                         if (bytenr + BTRFS_SUPER_INFO_SIZE >=
    1783           0 :                                         i_size_read(bdev->bd_inode))
    1784             :                                 break;
    1785             : 
    1786           0 :                         brelse(bh);
    1787           0 :                         bh = __bread(bdev, bytenr / 4096,
    1788             :                                         BTRFS_SUPER_INFO_SIZE);
    1789           0 :                         if (!bh)
    1790           0 :                                 continue;
    1791             : 
    1792           0 :                         disk_super = (struct btrfs_super_block *)bh->b_data;
    1793             : 
    1794           0 :                         if (btrfs_super_bytenr(disk_super) != bytenr ||
    1795             :                                 btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
    1796           0 :                                 continue;
    1797             :                         }
    1798           0 :                         memset(&disk_super->magic, 0,
    1799             :                                                 sizeof(disk_super->magic));
    1800           0 :                         set_buffer_dirty(bh);
    1801           0 :                         sync_dirty_buffer(bh);
    1802             :                 }
    1803             :         }
    1804             : 
    1805             :         ret = 0;
    1806             : 
    1807           0 :         if (bdev) {
    1808             :                 /* Notify udev that device has changed */
    1809           0 :                 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
    1810             : 
    1811             :                 /* Update ctime/mtime for device path for libblkid */
    1812           0 :                 update_dev_time(device_path);
    1813             :         }
    1814             : 
    1815             : error_brelse:
    1816           0 :         brelse(bh);
    1817           0 :         if (bdev)
    1818           0 :                 blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
    1819             : out:
    1820           0 :         mutex_unlock(&uuid_mutex);
    1821           0 :         return ret;
    1822             : error_undo:
    1823           0 :         if (device->writeable) {
    1824             :                 lock_chunks(root);
    1825           0 :                 list_add(&device->dev_alloc_list,
    1826           0 :                          &root->fs_info->fs_devices->alloc_list);
    1827             :                 unlock_chunks(root);
    1828           0 :                 root->fs_info->fs_devices->rw_devices++;
    1829             :         }
    1830             :         goto error_brelse;
    1831             : }
    1832             : 
    1833           7 : void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
    1834             :                                  struct btrfs_device *srcdev)
    1835             : {
    1836          14 :         WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
    1837             : 
    1838           7 :         list_del_rcu(&srcdev->dev_list);
    1839           7 :         list_del_rcu(&srcdev->dev_alloc_list);
    1840           7 :         fs_info->fs_devices->num_devices--;
    1841           7 :         if (srcdev->missing) {
    1842           0 :                 fs_info->fs_devices->missing_devices--;
    1843           0 :                 fs_info->fs_devices->rw_devices++;
    1844             :         }
    1845           7 :         if (srcdev->can_discard)
    1846           0 :                 fs_info->fs_devices->num_can_discard--;
    1847           7 :         if (srcdev->bdev) {
    1848           7 :                 fs_info->fs_devices->open_devices--;
    1849             : 
    1850             :                 /*
    1851             :                  * zero out the old super if it is not writable
    1852             :                  * (e.g. seed device)
    1853             :                  */
    1854           7 :                 if (srcdev->writeable)
    1855           7 :                         btrfs_scratch_superblock(srcdev);
    1856             :         }
    1857             : 
    1858           7 :         call_rcu(&srcdev->rcu, free_device);
    1859           7 : }
    1860             : 
    1861           1 : void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
    1862             :                                       struct btrfs_device *tgtdev)
    1863             : {
    1864             :         struct btrfs_device *next_device;
    1865             : 
    1866           1 :         WARN_ON(!tgtdev);
    1867           1 :         mutex_lock(&fs_info->fs_devices->device_list_mutex);
    1868           1 :         if (tgtdev->bdev) {
    1869           1 :                 btrfs_scratch_superblock(tgtdev);
    1870           1 :                 fs_info->fs_devices->open_devices--;
    1871             :         }
    1872           1 :         fs_info->fs_devices->num_devices--;
    1873           1 :         if (tgtdev->can_discard)
    1874           0 :                 fs_info->fs_devices->num_can_discard++;
    1875             : 
    1876           1 :         next_device = list_entry(fs_info->fs_devices->devices.next,
    1877             :                                  struct btrfs_device, dev_list);
    1878           1 :         if (tgtdev->bdev == fs_info->sb->s_bdev)
    1879           0 :                 fs_info->sb->s_bdev = next_device->bdev;
    1880           1 :         if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
    1881           0 :                 fs_info->fs_devices->latest_bdev = next_device->bdev;
    1882           1 :         list_del_rcu(&tgtdev->dev_list);
    1883             : 
    1884           1 :         call_rcu(&tgtdev->rcu, free_device);
    1885             : 
    1886           1 :         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    1887           1 : }
    1888             : 
    1889           8 : static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
    1890             :                                      struct btrfs_device **device)
    1891             : {
    1892             :         int ret = 0;
    1893             :         struct btrfs_super_block *disk_super;
    1894             :         u64 devid;
    1895             :         u8 *dev_uuid;
    1896             :         struct block_device *bdev;
    1897             :         struct buffer_head *bh;
    1898             : 
    1899           8 :         *device = NULL;
    1900           8 :         ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
    1901           8 :                                     root->fs_info->bdev_holder, 0, &bdev, &bh);
    1902           8 :         if (ret)
    1903             :                 return ret;
    1904           8 :         disk_super = (struct btrfs_super_block *)bh->b_data;
    1905             :         devid = btrfs_stack_device_id(&disk_super->dev_item);
    1906           8 :         dev_uuid = disk_super->dev_item.uuid;
    1907           8 :         *device = btrfs_find_device(root->fs_info, devid, dev_uuid,
    1908           8 :                                     disk_super->fsid);
    1909             :         brelse(bh);
    1910           8 :         if (!*device)
    1911             :                 ret = -ENOENT;
    1912           8 :         blkdev_put(bdev, FMODE_READ);
    1913             :         return ret;
    1914             : }
    1915             : 
    1916           8 : int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
    1917             :                                          char *device_path,
    1918             :                                          struct btrfs_device **device)
    1919             : {
    1920           8 :         *device = NULL;
    1921           8 :         if (strcmp(device_path, "missing") == 0) {
    1922             :                 struct list_head *devices;
    1923             :                 struct btrfs_device *tmp;
    1924             : 
    1925           0 :                 devices = &root->fs_info->fs_devices->devices;
    1926             :                 /*
    1927             :                  * It is safe to read the devices since the volume_mutex
    1928             :                  * is held by the caller.
    1929             :                  */
    1930           0 :                 list_for_each_entry(tmp, devices, dev_list) {
    1931           0 :                         if (tmp->in_fs_metadata && !tmp->bdev) {
    1932           0 :                                 *device = tmp;
    1933           0 :                                 break;
    1934             :                         }
    1935             :                 }
    1936             : 
    1937           0 :                 if (!*device) {
    1938           0 :                         btrfs_err(root->fs_info, "no missing device found");
    1939           0 :                         return -ENOENT;
    1940             :                 }
    1941             : 
    1942             :                 return 0;
    1943             :         } else {
    1944           8 :                 return btrfs_find_device_by_path(root, device_path, device);
    1945             :         }
    1946             : }
    1947             : 
    1948             : /*
    1949             :  * does all the dirty work required for changing file system's UUID.
    1950             :  */
    1951           0 : static int btrfs_prepare_sprout(struct btrfs_root *root)
    1952             : {
    1953           0 :         struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
    1954             :         struct btrfs_fs_devices *old_devices;
    1955             :         struct btrfs_fs_devices *seed_devices;
    1956           0 :         struct btrfs_super_block *disk_super = root->fs_info->super_copy;
    1957             :         struct btrfs_device *device;
    1958             :         u64 super_flags;
    1959             : 
    1960           0 :         BUG_ON(!mutex_is_locked(&uuid_mutex));
    1961           0 :         if (!fs_devices->seeding)
    1962             :                 return -EINVAL;
    1963             : 
    1964           0 :         seed_devices = __alloc_fs_devices();
    1965           0 :         if (IS_ERR(seed_devices))
    1966           0 :                 return PTR_ERR(seed_devices);
    1967             : 
    1968           0 :         old_devices = clone_fs_devices(fs_devices);
    1969           0 :         if (IS_ERR(old_devices)) {
    1970           0 :                 kfree(seed_devices);
    1971           0 :                 return PTR_ERR(old_devices);
    1972             :         }
    1973             : 
    1974           0 :         list_add(&old_devices->list, &fs_uuids);
    1975             : 
    1976           0 :         memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
    1977           0 :         seed_devices->opened = 1;
    1978           0 :         INIT_LIST_HEAD(&seed_devices->devices);
    1979           0 :         INIT_LIST_HEAD(&seed_devices->alloc_list);
    1980           0 :         mutex_init(&seed_devices->device_list_mutex);
    1981             : 
    1982           0 :         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
    1983           0 :         list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
    1984             :                               synchronize_rcu);
    1985             : 
    1986           0 :         list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
    1987           0 :         list_for_each_entry(device, &seed_devices->devices, dev_list) {
    1988           0 :                 device->fs_devices = seed_devices;
    1989             :         }
    1990             : 
    1991           0 :         fs_devices->seeding = 0;
    1992           0 :         fs_devices->num_devices = 0;
    1993           0 :         fs_devices->open_devices = 0;
    1994           0 :         fs_devices->missing_devices = 0;
    1995           0 :         fs_devices->num_can_discard = 0;
    1996           0 :         fs_devices->rotating = 0;
    1997           0 :         fs_devices->seed = seed_devices;
    1998             : 
    1999           0 :         generate_random_uuid(fs_devices->fsid);
    2000           0 :         memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
    2001           0 :         memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
    2002           0 :         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
    2003             : 
    2004           0 :         super_flags = btrfs_super_flags(disk_super) &
    2005             :                       ~BTRFS_SUPER_FLAG_SEEDING;
    2006             :         btrfs_set_super_flags(disk_super, super_flags);
    2007             : 
    2008             :         return 0;
    2009             : }
    2010             : 
    2011             : /*
    2012             :  * strore the expected generation for seed devices in device items.
    2013             :  */
    2014           0 : static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
    2015             :                                struct btrfs_root *root)
    2016             : {
    2017             :         struct btrfs_path *path;
    2018           0 :         struct extent_buffer *leaf;
    2019             :         struct btrfs_dev_item *dev_item;
    2020             :         struct btrfs_device *device;
    2021             :         struct btrfs_key key;
    2022             :         u8 fs_uuid[BTRFS_UUID_SIZE];
    2023             :         u8 dev_uuid[BTRFS_UUID_SIZE];
    2024             :         u64 devid;
    2025             :         int ret;
    2026             : 
    2027           0 :         path = btrfs_alloc_path();
    2028           0 :         if (!path)
    2029             :                 return -ENOMEM;
    2030             : 
    2031           0 :         root = root->fs_info->chunk_root;
    2032           0 :         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
    2033           0 :         key.offset = 0;
    2034           0 :         key.type = BTRFS_DEV_ITEM_KEY;
    2035             : 
    2036             :         while (1) {
    2037           0 :                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
    2038           0 :                 if (ret < 0)
    2039             :                         goto error;
    2040             : 
    2041           0 :                 leaf = path->nodes[0];
    2042             : next_slot:
    2043           0 :                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
    2044           0 :                         ret = btrfs_next_leaf(root, path);
    2045           0 :                         if (ret > 0)
    2046             :                                 break;
    2047           0 :                         if (ret < 0)
    2048             :                                 goto error;
    2049           0 :                         leaf = path->nodes[0];
    2050           0 :                         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2051           0 :                         btrfs_release_path(path);
    2052           0 :                         continue;
    2053             :                 }
    2054             : 
    2055           0 :                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2056           0 :                 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
    2057           0 :                     key.type != BTRFS_DEV_ITEM_KEY)
    2058             :                         break;
    2059             : 
    2060           0 :                 dev_item = btrfs_item_ptr(leaf, path->slots[0],
    2061             :                                           struct btrfs_dev_item);
    2062             :                 devid = btrfs_device_id(leaf, dev_item);
    2063           0 :                 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
    2064             :                                    BTRFS_UUID_SIZE);
    2065           0 :                 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
    2066             :                                    BTRFS_UUID_SIZE);
    2067           0 :                 device = btrfs_find_device(root->fs_info, devid, dev_uuid,
    2068             :                                            fs_uuid);
    2069           0 :                 BUG_ON(!device); /* Logic error */
    2070             : 
    2071           0 :                 if (device->fs_devices->seeding) {
    2072           0 :                         btrfs_set_device_generation(leaf, dev_item,
    2073             :                                                     device->generation);
    2074           0 :                         btrfs_mark_buffer_dirty(leaf);
    2075             :                 }
    2076             : 
    2077           0 :                 path->slots[0]++;
    2078             :                 goto next_slot;
    2079             :         }
    2080             :         ret = 0;
    2081             : error:
    2082           0 :         btrfs_free_path(path);
    2083             :         return ret;
    2084             : }
    2085             : 
    2086           0 : int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
    2087             : {
    2088             :         struct request_queue *q;
    2089             :         struct btrfs_trans_handle *trans;
    2090             :         struct btrfs_device *device;
    2091           0 :         struct block_device *bdev;
    2092             :         struct list_head *devices;
    2093           0 :         struct super_block *sb = root->fs_info->sb;
    2094             :         struct rcu_string *name;
    2095             :         u64 total_bytes;
    2096             :         int seeding_dev = 0;
    2097             :         int ret = 0;
    2098             : 
    2099           0 :         if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
    2100             :                 return -EROFS;
    2101             : 
    2102           0 :         bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
    2103             :                                   root->fs_info->bdev_holder);
    2104           0 :         if (IS_ERR(bdev))
    2105           0 :                 return PTR_ERR(bdev);
    2106             : 
    2107           0 :         if (root->fs_info->fs_devices->seeding) {
    2108             :                 seeding_dev = 1;
    2109           0 :                 down_write(&sb->s_umount);
    2110           0 :                 mutex_lock(&uuid_mutex);
    2111             :         }
    2112             : 
    2113           0 :         filemap_write_and_wait(bdev->bd_inode->i_mapping);
    2114             : 
    2115           0 :         devices = &root->fs_info->fs_devices->devices;
    2116             : 
    2117           0 :         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
    2118           0 :         list_for_each_entry(device, devices, dev_list) {
    2119           0 :                 if (device->bdev == bdev) {
    2120             :                         ret = -EEXIST;
    2121           0 :                         mutex_unlock(
    2122           0 :                                 &root->fs_info->fs_devices->device_list_mutex);
    2123           0 :                         goto error;
    2124             :                 }
    2125             :         }
    2126           0 :         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
    2127             : 
    2128           0 :         device = btrfs_alloc_device(root->fs_info, NULL, NULL);
    2129           0 :         if (IS_ERR(device)) {
    2130             :                 /* we can safely leave the fs_devices entry around */
    2131           0 :                 ret = PTR_ERR(device);
    2132           0 :                 goto error;
    2133             :         }
    2134             : 
    2135           0 :         name = rcu_string_strdup(device_path, GFP_NOFS);
    2136           0 :         if (!name) {
    2137           0 :                 kfree(device);
    2138             :                 ret = -ENOMEM;
    2139           0 :                 goto error;
    2140             :         }
    2141           0 :         rcu_assign_pointer(device->name, name);
    2142             : 
    2143           0 :         trans = btrfs_start_transaction(root, 0);
    2144           0 :         if (IS_ERR(trans)) {
    2145           0 :                 rcu_string_free(device->name);
    2146           0 :                 kfree(device);
    2147           0 :                 ret = PTR_ERR(trans);
    2148           0 :                 goto error;
    2149             :         }
    2150             : 
    2151             :         lock_chunks(root);
    2152             : 
    2153             :         q = bdev_get_queue(bdev);
    2154           0 :         if (blk_queue_discard(q))
    2155           0 :                 device->can_discard = 1;
    2156           0 :         device->writeable = 1;
    2157           0 :         device->generation = trans->transid;
    2158           0 :         device->io_width = root->sectorsize;
    2159           0 :         device->io_align = root->sectorsize;
    2160           0 :         device->sector_size = root->sectorsize;
    2161           0 :         device->total_bytes = i_size_read(bdev->bd_inode);
    2162           0 :         device->disk_total_bytes = device->total_bytes;
    2163           0 :         device->dev_root = root->fs_info->dev_root;
    2164           0 :         device->bdev = bdev;
    2165           0 :         device->in_fs_metadata = 1;
    2166           0 :         device->is_tgtdev_for_dev_replace = 0;
    2167           0 :         device->mode = FMODE_EXCL;
    2168           0 :         device->dev_stats_valid = 1;
    2169           0 :         set_blocksize(device->bdev, 4096);
    2170             : 
    2171           0 :         if (seeding_dev) {
    2172           0 :                 sb->s_flags &= ~MS_RDONLY;
    2173           0 :                 ret = btrfs_prepare_sprout(root);
    2174           0 :                 BUG_ON(ret); /* -ENOMEM */
    2175             :         }
    2176             : 
    2177           0 :         device->fs_devices = root->fs_info->fs_devices;
    2178             : 
    2179           0 :         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
    2180           0 :         list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
    2181           0 :         list_add(&device->dev_alloc_list,
    2182           0 :                  &root->fs_info->fs_devices->alloc_list);
    2183           0 :         root->fs_info->fs_devices->num_devices++;
    2184           0 :         root->fs_info->fs_devices->open_devices++;
    2185           0 :         root->fs_info->fs_devices->rw_devices++;
    2186           0 :         root->fs_info->fs_devices->total_devices++;
    2187           0 :         if (device->can_discard)
    2188           0 :                 root->fs_info->fs_devices->num_can_discard++;
    2189           0 :         root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
    2190             : 
    2191           0 :         spin_lock(&root->fs_info->free_chunk_lock);
    2192           0 :         root->fs_info->free_chunk_space += device->total_bytes;
    2193           0 :         spin_unlock(&root->fs_info->free_chunk_lock);
    2194             : 
    2195           0 :         if (!blk_queue_nonrot(bdev_get_queue(bdev)))
    2196           0 :                 root->fs_info->fs_devices->rotating = 1;
    2197             : 
    2198           0 :         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
    2199           0 :         btrfs_set_super_total_bytes(root->fs_info->super_copy,
    2200           0 :                                     total_bytes + device->total_bytes);
    2201             : 
    2202           0 :         total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
    2203           0 :         btrfs_set_super_num_devices(root->fs_info->super_copy,
    2204             :                                     total_bytes + 1);
    2205             : 
    2206             :         /* add sysfs device entry */
    2207           0 :         btrfs_kobj_add_device(root->fs_info, device);
    2208             : 
    2209           0 :         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
    2210             : 
    2211           0 :         if (seeding_dev) {
    2212             :                 char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
    2213           0 :                 ret = init_first_rw_device(trans, root, device);
    2214           0 :                 if (ret) {
    2215           0 :                         btrfs_abort_transaction(trans, root, ret);
    2216           0 :                         goto error_trans;
    2217             :                 }
    2218           0 :                 ret = btrfs_finish_sprout(trans, root);
    2219           0 :                 if (ret) {
    2220           0 :                         btrfs_abort_transaction(trans, root, ret);
    2221           0 :                         goto error_trans;
    2222             :                 }
    2223             : 
    2224             :                 /* Sprouting would change fsid of the mounted root,
    2225             :                  * so rename the fsid on the sysfs
    2226             :                  */
    2227           0 :                 snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
    2228           0 :                                                 root->fs_info->fsid);
    2229           0 :                 if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
    2230             :                         goto error_trans;
    2231             :         } else {
    2232           0 :                 ret = btrfs_add_device(trans, root, device);
    2233           0 :                 if (ret) {
    2234           0 :                         btrfs_abort_transaction(trans, root, ret);
    2235           0 :                         goto error_trans;
    2236             :                 }
    2237             :         }
    2238             : 
    2239             :         /*
    2240             :          * we've got more storage, clear any full flags on the space
    2241             :          * infos
    2242             :          */
    2243           0 :         btrfs_clear_space_info_full(root->fs_info);
    2244             : 
    2245             :         unlock_chunks(root);
    2246           0 :         root->fs_info->num_tolerated_disk_barrier_failures =
    2247           0 :                 btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info);
    2248           0 :         ret = btrfs_commit_transaction(trans, root);
    2249             : 
    2250           0 :         if (seeding_dev) {
    2251           0 :                 mutex_unlock(&uuid_mutex);
    2252           0 :                 up_write(&sb->s_umount);
    2253             : 
    2254           0 :                 if (ret) /* transaction commit */
    2255             :                         return ret;
    2256             : 
    2257           0 :                 ret = btrfs_relocate_sys_chunks(root);
    2258           0 :                 if (ret < 0)
    2259           0 :                         btrfs_error(root->fs_info, ret,
    2260             :                                     "Failed to relocate sys chunks after "
    2261             :                                     "device initialization. This can be fixed "
    2262             :                                     "using the \"btrfs balance\" command.");
    2263           0 :                 trans = btrfs_attach_transaction(root);
    2264           0 :                 if (IS_ERR(trans)) {
    2265           0 :                         if (PTR_ERR(trans) == -ENOENT)
    2266             :                                 return 0;
    2267           0 :                         return PTR_ERR(trans);
    2268             :                 }
    2269           0 :                 ret = btrfs_commit_transaction(trans, root);
    2270             :         }
    2271             : 
    2272             :         /* Update ctime/mtime for libblkid */
    2273           0 :         update_dev_time(device_path);
    2274           0 :         return ret;
    2275             : 
    2276             : error_trans:
    2277             :         unlock_chunks(root);
    2278           0 :         btrfs_end_transaction(trans, root);
    2279           0 :         rcu_string_free(device->name);
    2280           0 :         btrfs_kobj_rm_device(root->fs_info, device);
    2281           0 :         kfree(device);
    2282             : error:
    2283           0 :         blkdev_put(bdev, FMODE_EXCL);
    2284           0 :         if (seeding_dev) {
    2285           0 :                 mutex_unlock(&uuid_mutex);
    2286           0 :                 up_write(&sb->s_umount);
    2287             :         }
    2288           0 :         return ret;
    2289             : }
    2290             : 
    2291           8 : int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
    2292             :                                   struct btrfs_device **device_out)
    2293             : {
    2294             :         struct request_queue *q;
    2295             :         struct btrfs_device *device;
    2296           8 :         struct block_device *bdev;
    2297           8 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2298             :         struct list_head *devices;
    2299             :         struct rcu_string *name;
    2300           8 :         u64 devid = BTRFS_DEV_REPLACE_DEVID;
    2301             :         int ret = 0;
    2302             : 
    2303           8 :         *device_out = NULL;
    2304           8 :         if (fs_info->fs_devices->seeding)
    2305             :                 return -EINVAL;
    2306             : 
    2307           8 :         bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
    2308             :                                   fs_info->bdev_holder);
    2309           8 :         if (IS_ERR(bdev))
    2310           0 :                 return PTR_ERR(bdev);
    2311             : 
    2312          16 :         filemap_write_and_wait(bdev->bd_inode->i_mapping);
    2313             : 
    2314           8 :         devices = &fs_info->fs_devices->devices;
    2315          19 :         list_for_each_entry(device, devices, dev_list) {
    2316          11 :                 if (device->bdev == bdev) {
    2317             :                         ret = -EEXIST;
    2318             :                         goto error;
    2319             :                 }
    2320             :         }
    2321             : 
    2322           8 :         device = btrfs_alloc_device(NULL, &devid, NULL);
    2323           8 :         if (IS_ERR(device)) {
    2324           0 :                 ret = PTR_ERR(device);
    2325           0 :                 goto error;
    2326             :         }
    2327             : 
    2328           8 :         name = rcu_string_strdup(device_path, GFP_NOFS);
    2329           8 :         if (!name) {
    2330           0 :                 kfree(device);
    2331             :                 ret = -ENOMEM;
    2332           0 :                 goto error;
    2333             :         }
    2334           8 :         rcu_assign_pointer(device->name, name);
    2335             : 
    2336             :         q = bdev_get_queue(bdev);
    2337           8 :         if (blk_queue_discard(q))
    2338           0 :                 device->can_discard = 1;
    2339           8 :         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
    2340           8 :         device->writeable = 1;
    2341           8 :         device->generation = 0;
    2342           8 :         device->io_width = root->sectorsize;
    2343           8 :         device->io_align = root->sectorsize;
    2344           8 :         device->sector_size = root->sectorsize;
    2345          16 :         device->total_bytes = i_size_read(bdev->bd_inode);
    2346           8 :         device->disk_total_bytes = device->total_bytes;
    2347           8 :         device->dev_root = fs_info->dev_root;
    2348           8 :         device->bdev = bdev;
    2349           8 :         device->in_fs_metadata = 1;
    2350           8 :         device->is_tgtdev_for_dev_replace = 1;
    2351           8 :         device->mode = FMODE_EXCL;
    2352           8 :         device->dev_stats_valid = 1;
    2353           8 :         set_blocksize(device->bdev, 4096);
    2354           8 :         device->fs_devices = fs_info->fs_devices;
    2355           8 :         list_add(&device->dev_list, &fs_info->fs_devices->devices);
    2356           8 :         fs_info->fs_devices->num_devices++;
    2357           8 :         fs_info->fs_devices->open_devices++;
    2358           8 :         if (device->can_discard)
    2359           0 :                 fs_info->fs_devices->num_can_discard++;
    2360           8 :         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
    2361             : 
    2362           8 :         *device_out = device;
    2363           8 :         return ret;
    2364             : 
    2365             : error:
    2366           0 :         blkdev_put(bdev, FMODE_EXCL);
    2367           0 :         return ret;
    2368             : }
    2369             : 
    2370           0 : void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
    2371             :                                               struct btrfs_device *tgtdev)
    2372             : {
    2373           0 :         WARN_ON(fs_info->fs_devices->rw_devices == 0);
    2374           0 :         tgtdev->io_width = fs_info->dev_root->sectorsize;
    2375           0 :         tgtdev->io_align = fs_info->dev_root->sectorsize;
    2376           0 :         tgtdev->sector_size = fs_info->dev_root->sectorsize;
    2377           0 :         tgtdev->dev_root = fs_info->dev_root;
    2378           0 :         tgtdev->in_fs_metadata = 1;
    2379           0 : }
    2380             : 
    2381         249 : static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
    2382             :                                         struct btrfs_device *device)
    2383             : {
    2384             :         int ret;
    2385             :         struct btrfs_path *path;
    2386             :         struct btrfs_root *root;
    2387             :         struct btrfs_dev_item *dev_item;
    2388             :         struct extent_buffer *leaf;
    2389             :         struct btrfs_key key;
    2390             : 
    2391         249 :         root = device->dev_root->fs_info->chunk_root;
    2392             : 
    2393         249 :         path = btrfs_alloc_path();
    2394         249 :         if (!path)
    2395             :                 return -ENOMEM;
    2396             : 
    2397         249 :         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
    2398         249 :         key.type = BTRFS_DEV_ITEM_KEY;
    2399         249 :         key.offset = device->devid;
    2400             : 
    2401         249 :         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
    2402         249 :         if (ret < 0)
    2403             :                 goto out;
    2404             : 
    2405         249 :         if (ret > 0) {
    2406             :                 ret = -ENOENT;
    2407             :                 goto out;
    2408             :         }
    2409             : 
    2410         249 :         leaf = path->nodes[0];
    2411         498 :         dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
    2412             : 
    2413         249 :         btrfs_set_device_id(leaf, dev_item, device->devid);
    2414         249 :         btrfs_set_device_type(leaf, dev_item, device->type);
    2415         249 :         btrfs_set_device_io_align(leaf, dev_item, device->io_align);
    2416         249 :         btrfs_set_device_io_width(leaf, dev_item, device->io_width);
    2417         249 :         btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
    2418         249 :         btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
    2419         249 :         btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
    2420         249 :         btrfs_mark_buffer_dirty(leaf);
    2421             : 
    2422             : out:
    2423         249 :         btrfs_free_path(path);
    2424         249 :         return ret;
    2425             : }
    2426             : 
    2427           0 : static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
    2428             :                       struct btrfs_device *device, u64 new_size)
    2429             : {
    2430           0 :         struct btrfs_super_block *super_copy =
    2431           0 :                 device->dev_root->fs_info->super_copy;
    2432             :         u64 old_total = btrfs_super_total_bytes(super_copy);
    2433           0 :         u64 diff = new_size - device->total_bytes;
    2434             : 
    2435           0 :         if (!device->writeable)
    2436             :                 return -EACCES;
    2437           0 :         if (new_size <= device->total_bytes ||
    2438           0 :             device->is_tgtdev_for_dev_replace)
    2439             :                 return -EINVAL;
    2440             : 
    2441           0 :         btrfs_set_super_total_bytes(super_copy, old_total + diff);
    2442           0 :         device->fs_devices->total_rw_bytes += diff;
    2443             : 
    2444           0 :         device->total_bytes = new_size;
    2445           0 :         device->disk_total_bytes = new_size;
    2446           0 :         btrfs_clear_space_info_full(device->dev_root->fs_info);
    2447             : 
    2448           0 :         return btrfs_update_device(trans, device);
    2449             : }
    2450             : 
    2451           0 : int btrfs_grow_device(struct btrfs_trans_handle *trans,
    2452             :                       struct btrfs_device *device, u64 new_size)
    2453             : {
    2454             :         int ret;
    2455           0 :         lock_chunks(device->dev_root);
    2456           0 :         ret = __btrfs_grow_device(trans, device, new_size);
    2457           0 :         unlock_chunks(device->dev_root);
    2458           0 :         return ret;
    2459             : }
    2460             : 
    2461          72 : static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
    2462             :                             struct btrfs_root *root,
    2463             :                             u64 chunk_tree, u64 chunk_objectid,
    2464             :                             u64 chunk_offset)
    2465             : {
    2466             :         int ret;
    2467             :         struct btrfs_path *path;
    2468             :         struct btrfs_key key;
    2469             : 
    2470          72 :         root = root->fs_info->chunk_root;
    2471          72 :         path = btrfs_alloc_path();
    2472          72 :         if (!path)
    2473             :                 return -ENOMEM;
    2474             : 
    2475          72 :         key.objectid = chunk_objectid;
    2476          72 :         key.offset = chunk_offset;
    2477          72 :         key.type = BTRFS_CHUNK_ITEM_KEY;
    2478             : 
    2479          72 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    2480          72 :         if (ret < 0)
    2481             :                 goto out;
    2482          72 :         else if (ret > 0) { /* Logic error or corruption */
    2483           0 :                 btrfs_error(root->fs_info, -ENOENT,
    2484             :                             "Failed lookup while freeing chunk.");
    2485             :                 ret = -ENOENT;
    2486             :                 goto out;
    2487             :         }
    2488             : 
    2489             :         ret = btrfs_del_item(trans, root, path);
    2490          72 :         if (ret < 0)
    2491           0 :                 btrfs_error(root->fs_info, ret,
    2492             :                             "Failed to delete chunk item.");
    2493             : out:
    2494          72 :         btrfs_free_path(path);
    2495             :         return ret;
    2496             : }
    2497             : 
    2498          25 : static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
    2499             :                         chunk_offset)
    2500             : {
    2501          25 :         struct btrfs_super_block *super_copy = root->fs_info->super_copy;
    2502             :         struct btrfs_disk_key *disk_key;
    2503             :         struct btrfs_chunk *chunk;
    2504             :         u8 *ptr;
    2505             :         int ret = 0;
    2506             :         u32 num_stripes;
    2507             :         u32 array_size;
    2508             :         u32 len = 0;
    2509             :         u32 cur;
    2510             :         struct btrfs_key key;
    2511             : 
    2512             :         array_size = btrfs_super_sys_array_size(super_copy);
    2513             : 
    2514          25 :         ptr = super_copy->sys_chunk_array;
    2515             :         cur = 0;
    2516             : 
    2517          78 :         while (cur < array_size) {
    2518             :                 disk_key = (struct btrfs_disk_key *)ptr;
    2519             :                 btrfs_disk_key_to_cpu(&key, disk_key);
    2520             : 
    2521             :                 len = sizeof(*disk_key);
    2522             : 
    2523          53 :                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
    2524             :                         chunk = (struct btrfs_chunk *)(ptr + len);
    2525             :                         num_stripes = btrfs_stack_chunk_num_stripes(chunk);
    2526         106 :                         len += btrfs_chunk_item_size(num_stripes);
    2527             :                 } else {
    2528             :                         ret = -EIO;
    2529             :                         break;
    2530             :                 }
    2531          53 :                 if (key.objectid == chunk_objectid &&
    2532             :                     key.offset == chunk_offset) {
    2533          25 :                         memmove(ptr, ptr + len, array_size - (cur + len));
    2534          25 :                         array_size -= len;
    2535             :                         btrfs_set_super_sys_array_size(super_copy, array_size);
    2536             :                 } else {
    2537          28 :                         ptr += len;
    2538          28 :                         cur += len;
    2539             :                 }
    2540             :         }
    2541          25 :         return ret;
    2542             : }
    2543             : 
    2544         313 : static int btrfs_relocate_chunk(struct btrfs_root *root,
    2545             :                          u64 chunk_tree, u64 chunk_objectid,
    2546             :                          u64 chunk_offset)
    2547             : {
    2548             :         struct extent_map_tree *em_tree;
    2549             :         struct btrfs_root *extent_root;
    2550             :         struct btrfs_trans_handle *trans;
    2551             :         struct extent_map *em;
    2552             :         struct map_lookup *map;
    2553             :         int ret;
    2554             :         int i;
    2555             : 
    2556          72 :         root = root->fs_info->chunk_root;
    2557          72 :         extent_root = root->fs_info->extent_root;
    2558          72 :         em_tree = &root->fs_info->mapping_tree.map_tree;
    2559             : 
    2560          72 :         ret = btrfs_can_relocate(extent_root, chunk_offset);
    2561          72 :         if (ret)
    2562             :                 return -ENOSPC;
    2563             : 
    2564             :         /* step one, relocate all the extents inside this chunk */
    2565          72 :         ret = btrfs_relocate_block_group(extent_root, chunk_offset);
    2566          72 :         if (ret)
    2567             :                 return ret;
    2568             : 
    2569          72 :         trans = btrfs_start_transaction(root, 0);
    2570          72 :         if (IS_ERR(trans)) {
    2571           0 :                 ret = PTR_ERR(trans);
    2572           0 :                 btrfs_std_error(root->fs_info, ret);
    2573             :                 return ret;
    2574             :         }
    2575             : 
    2576             :         lock_chunks(root);
    2577             : 
    2578             :         /*
    2579             :          * step two, delete the device extents and the
    2580             :          * chunk tree entries
    2581             :          */
    2582          72 :         read_lock(&em_tree->lock);
    2583          72 :         em = lookup_extent_mapping(em_tree, chunk_offset, 1);
    2584             :         read_unlock(&em_tree->lock);
    2585             : 
    2586          72 :         BUG_ON(!em || em->start > chunk_offset ||
    2587             :                em->start + em->len < chunk_offset);
    2588          72 :         map = (struct map_lookup *)em->bdev;
    2589             : 
    2590         116 :         for (i = 0; i < map->num_stripes; i++) {
    2591         116 :                 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
    2592             :                                             map->stripes[i].physical);
    2593         116 :                 BUG_ON(ret);
    2594             : 
    2595         116 :                 if (map->stripes[i].dev) {
    2596         116 :                         ret = btrfs_update_device(trans, map->stripes[i].dev);
    2597         116 :                         BUG_ON(ret);
    2598             :                 }
    2599             :         }
    2600          72 :         ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
    2601             :                                chunk_offset);
    2602             : 
    2603          72 :         BUG_ON(ret);
    2604             : 
    2605          72 :         trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
    2606             : 
    2607          72 :         if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
    2608          25 :                 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
    2609          25 :                 BUG_ON(ret);
    2610             :         }
    2611             : 
    2612          72 :         ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
    2613          72 :         BUG_ON(ret);
    2614             : 
    2615          72 :         write_lock(&em_tree->lock);
    2616          72 :         remove_extent_mapping(em_tree, em);
    2617             :         write_unlock(&em_tree->lock);
    2618             : 
    2619             :         /* once for the tree */
    2620          72 :         free_extent_map(em);
    2621             :         /* once for us */
    2622          72 :         free_extent_map(em);
    2623             : 
    2624             :         unlock_chunks(root);
    2625          72 :         btrfs_end_transaction(trans, root);
    2626             :         return 0;
    2627             : }
    2628             : 
    2629           0 : static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
    2630             : {
    2631           0 :         struct btrfs_root *chunk_root = root->fs_info->chunk_root;
    2632             :         struct btrfs_path *path;
    2633             :         struct extent_buffer *leaf;
    2634             :         struct btrfs_chunk *chunk;
    2635             :         struct btrfs_key key;
    2636             :         struct btrfs_key found_key;
    2637             :         u64 chunk_tree = chunk_root->root_key.objectid;
    2638             :         u64 chunk_type;
    2639             :         bool retried = false;
    2640             :         int failed = 0;
    2641             :         int ret;
    2642             : 
    2643           0 :         path = btrfs_alloc_path();
    2644           0 :         if (!path)
    2645             :                 return -ENOMEM;
    2646             : 
    2647             : again:
    2648           0 :         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
    2649           0 :         key.offset = (u64)-1;
    2650           0 :         key.type = BTRFS_CHUNK_ITEM_KEY;
    2651             : 
    2652             :         while (1) {
    2653           0 :                 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
    2654           0 :                 if (ret < 0)
    2655             :                         goto error;
    2656           0 :                 BUG_ON(ret == 0); /* Corruption */
    2657             : 
    2658           0 :                 ret = btrfs_previous_item(chunk_root, path, key.objectid,
    2659           0 :                                           key.type);
    2660           0 :                 if (ret < 0)
    2661             :                         goto error;
    2662           0 :                 if (ret > 0)
    2663             :                         break;
    2664             : 
    2665           0 :                 leaf = path->nodes[0];
    2666           0 :                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
    2667             : 
    2668           0 :                 chunk = btrfs_item_ptr(leaf, path->slots[0],
    2669             :                                        struct btrfs_chunk);
    2670             :                 chunk_type = btrfs_chunk_type(leaf, chunk);
    2671           0 :                 btrfs_release_path(path);
    2672             : 
    2673           0 :                 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
    2674           0 :                         ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
    2675             :                                                    found_key.objectid,
    2676             :                                                    found_key.offset);
    2677           0 :                         if (ret == -ENOSPC)
    2678           0 :                                 failed++;
    2679           0 :                         else if (ret)
    2680           0 :                                 BUG();
    2681             :                 }
    2682             : 
    2683           0 :                 if (found_key.offset == 0)
    2684             :                         break;
    2685           0 :                 key.offset = found_key.offset - 1;
    2686             :         }
    2687             :         ret = 0;
    2688           0 :         if (failed && !retried) {
    2689             :                 failed = 0;
    2690             :                 retried = true;
    2691             :                 goto again;
    2692           0 :         } else if (WARN_ON(failed && retried)) {
    2693             :                 ret = -ENOSPC;
    2694             :         }
    2695             : error:
    2696           0 :         btrfs_free_path(path);
    2697             :         return ret;
    2698             : }
    2699             : 
    2700          22 : static int insert_balance_item(struct btrfs_root *root,
    2701             :                                struct btrfs_balance_control *bctl)
    2702             : {
    2703             :         struct btrfs_trans_handle *trans;
    2704             :         struct btrfs_balance_item *item;
    2705             :         struct btrfs_disk_balance_args disk_bargs;
    2706             :         struct btrfs_path *path;
    2707             :         struct extent_buffer *leaf;
    2708             :         struct btrfs_key key;
    2709             :         int ret, err;
    2710             : 
    2711          22 :         path = btrfs_alloc_path();
    2712          22 :         if (!path)
    2713             :                 return -ENOMEM;
    2714             : 
    2715          22 :         trans = btrfs_start_transaction(root, 0);
    2716          22 :         if (IS_ERR(trans)) {
    2717           0 :                 btrfs_free_path(path);
    2718           0 :                 return PTR_ERR(trans);
    2719             :         }
    2720             : 
    2721          22 :         key.objectid = BTRFS_BALANCE_OBJECTID;
    2722          22 :         key.type = BTRFS_BALANCE_ITEM_KEY;
    2723          22 :         key.offset = 0;
    2724             : 
    2725             :         ret = btrfs_insert_empty_item(trans, root, path, &key,
    2726             :                                       sizeof(*item));
    2727          22 :         if (ret)
    2728             :                 goto out;
    2729             : 
    2730          22 :         leaf = path->nodes[0];
    2731          44 :         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
    2732             : 
    2733          22 :         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
    2734             : 
    2735          22 :         btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
    2736             :         btrfs_set_balance_data(leaf, item, &disk_bargs);
    2737          22 :         btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
    2738             :         btrfs_set_balance_meta(leaf, item, &disk_bargs);
    2739          22 :         btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
    2740             :         btrfs_set_balance_sys(leaf, item, &disk_bargs);
    2741             : 
    2742          22 :         btrfs_set_balance_flags(leaf, item, bctl->flags);
    2743             : 
    2744          22 :         btrfs_mark_buffer_dirty(leaf);
    2745             : out:
    2746          22 :         btrfs_free_path(path);
    2747          22 :         err = btrfs_commit_transaction(trans, root);
    2748          22 :         if (err && !ret)
    2749             :                 ret = err;
    2750          22 :         return ret;
    2751             : }
    2752             : 
    2753          22 : static int del_balance_item(struct btrfs_root *root)
    2754             : {
    2755             :         struct btrfs_trans_handle *trans;
    2756             :         struct btrfs_path *path;
    2757             :         struct btrfs_key key;
    2758             :         int ret, err;
    2759             : 
    2760          22 :         path = btrfs_alloc_path();
    2761          22 :         if (!path)
    2762             :                 return -ENOMEM;
    2763             : 
    2764          22 :         trans = btrfs_start_transaction(root, 0);
    2765          22 :         if (IS_ERR(trans)) {
    2766           0 :                 btrfs_free_path(path);
    2767           0 :                 return PTR_ERR(trans);
    2768             :         }
    2769             : 
    2770          22 :         key.objectid = BTRFS_BALANCE_OBJECTID;
    2771          22 :         key.type = BTRFS_BALANCE_ITEM_KEY;
    2772          22 :         key.offset = 0;
    2773             : 
    2774          22 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
    2775          22 :         if (ret < 0)
    2776             :                 goto out;
    2777          22 :         if (ret > 0) {
    2778             :                 ret = -ENOENT;
    2779             :                 goto out;
    2780             :         }
    2781             : 
    2782             :         ret = btrfs_del_item(trans, root, path);
    2783             : out:
    2784          22 :         btrfs_free_path(path);
    2785          22 :         err = btrfs_commit_transaction(trans, root);
    2786          22 :         if (err && !ret)
    2787             :                 ret = err;
    2788          22 :         return ret;
    2789             : }
    2790             : 
    2791             : /*
    2792             :  * This is a heuristic used to reduce the number of chunks balanced on
    2793             :  * resume after balance was interrupted.
    2794             :  */
    2795           0 : static void update_balance_args(struct btrfs_balance_control *bctl)
    2796             : {
    2797             :         /*
    2798             :          * Turn on soft mode for chunk types that were being converted.
    2799             :          */
    2800           0 :         if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
    2801           0 :                 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
    2802           0 :         if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
    2803           0 :                 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
    2804           0 :         if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
    2805           0 :                 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
    2806             : 
    2807             :         /*
    2808             :          * Turn on usage filter if is not already used.  The idea is
    2809             :          * that chunks that we have already balanced should be
    2810             :          * reasonably full.  Don't do it for chunks that are being
    2811             :          * converted - that will keep us from relocating unconverted
    2812             :          * (albeit full) chunks.
    2813             :          */
    2814           0 :         if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
    2815             :             !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
    2816           0 :                 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
    2817           0 :                 bctl->data.usage = 90;
    2818             :         }
    2819           0 :         if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
    2820             :             !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
    2821           0 :                 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
    2822           0 :                 bctl->sys.usage = 90;
    2823             :         }
    2824           0 :         if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
    2825             :             !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
    2826           0 :                 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
    2827           0 :                 bctl->meta.usage = 90;
    2828             :         }
    2829           0 : }
    2830             : 
    2831             : /*
    2832             :  * Should be called with both balance and volume mutexes held to
    2833             :  * serialize other volume operations (add_dev/rm_dev/resize) with
    2834             :  * restriper.  Same goes for unset_balance_control.
    2835             :  */
    2836          22 : static void set_balance_control(struct btrfs_balance_control *bctl)
    2837             : {
    2838          22 :         struct btrfs_fs_info *fs_info = bctl->fs_info;
    2839             : 
    2840          22 :         BUG_ON(fs_info->balance_ctl);
    2841             : 
    2842             :         spin_lock(&fs_info->balance_lock);
    2843          22 :         fs_info->balance_ctl = bctl;
    2844             :         spin_unlock(&fs_info->balance_lock);
    2845          22 : }
    2846             : 
    2847          22 : static void unset_balance_control(struct btrfs_fs_info *fs_info)
    2848             : {
    2849          22 :         struct btrfs_balance_control *bctl = fs_info->balance_ctl;
    2850             : 
    2851          22 :         BUG_ON(!fs_info->balance_ctl);
    2852             : 
    2853             :         spin_lock(&fs_info->balance_lock);
    2854          22 :         fs_info->balance_ctl = NULL;
    2855             :         spin_unlock(&fs_info->balance_lock);
    2856             : 
    2857          22 :         kfree(bctl);
    2858          22 : }
    2859             : 
    2860             : /*
    2861             :  * Balance filters.  Return 1 if chunk should be filtered out
    2862             :  * (should not be balanced).
    2863             :  */
    2864             : static int chunk_profiles_filter(u64 chunk_type,
    2865             :                                  struct btrfs_balance_args *bargs)
    2866             : {
    2867           0 :         chunk_type = chunk_to_extended(chunk_type) &
    2868             :                                 BTRFS_EXTENDED_PROFILE_MASK;
    2869             : 
    2870           0 :         if (bargs->profiles & chunk_type)
    2871             :                 return 0;
    2872             : 
    2873             :         return 1;
    2874             : }
    2875             : 
    2876           0 : static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
    2877             :                               struct btrfs_balance_args *bargs)
    2878             : {
    2879             :         struct btrfs_block_group_cache *cache;
    2880             :         u64 chunk_used, user_thresh;
    2881             :         int ret = 1;
    2882             : 
    2883           0 :         cache = btrfs_lookup_block_group(fs_info, chunk_offset);
    2884             :         chunk_used = btrfs_block_group_used(&cache->item);
    2885             : 
    2886           0 :         if (bargs->usage == 0)
    2887             :                 user_thresh = 1;
    2888           0 :         else if (bargs->usage > 100)
    2889           0 :                 user_thresh = cache->key.offset;
    2890             :         else
    2891           0 :                 user_thresh = div_factor_fine(cache->key.offset,
    2892             :                                               bargs->usage);
    2893             : 
    2894           0 :         if (chunk_used < user_thresh)
    2895             :                 ret = 0;
    2896             : 
    2897           0 :         btrfs_put_block_group(cache);
    2898           0 :         return ret;
    2899             : }
    2900             : 
    2901           0 : static int chunk_devid_filter(struct extent_buffer *leaf,
    2902             :                               struct btrfs_chunk *chunk,
    2903             :                               struct btrfs_balance_args *bargs)
    2904             : {
    2905             :         struct btrfs_stripe *stripe;
    2906           0 :         int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
    2907             :         int i;
    2908             : 
    2909           0 :         for (i = 0; i < num_stripes; i++) {
    2910             :                 stripe = btrfs_stripe_nr(chunk, i);
    2911           0 :                 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
    2912             :                         return 0;
    2913             :         }
    2914             : 
    2915             :         return 1;
    2916             : }
    2917             : 
    2918             : /* [pstart, pend) */
    2919           0 : static int chunk_drange_filter(struct extent_buffer *leaf,
    2920             :                                struct btrfs_chunk *chunk,
    2921             :                                u64 chunk_offset,
    2922             :                                struct btrfs_balance_args *bargs)
    2923             : {
    2924             :         struct btrfs_stripe *stripe;
    2925           0 :         int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
    2926             :         u64 stripe_offset;
    2927             :         u64 stripe_length;
    2928             :         int factor;
    2929             :         int i;
    2930             : 
    2931           0 :         if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
    2932             :                 return 0;
    2933             : 
    2934           0 :         if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
    2935             :              BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) {
    2936           0 :                 factor = num_stripes / 2;
    2937           0 :         } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID5) {
    2938           0 :                 factor = num_stripes - 1;
    2939           0 :         } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID6) {
    2940           0 :                 factor = num_stripes - 2;
    2941             :         } else {
    2942             :                 factor = num_stripes;
    2943             :         }
    2944             : 
    2945           0 :         for (i = 0; i < num_stripes; i++) {
    2946             :                 stripe = btrfs_stripe_nr(chunk, i);
    2947           0 :                 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
    2948           0 :                         continue;
    2949             : 
    2950             :                 stripe_offset = btrfs_stripe_offset(leaf, stripe);
    2951             :                 stripe_length = btrfs_chunk_length(leaf, chunk);
    2952           0 :                 do_div(stripe_length, factor);
    2953             : 
    2954           0 :                 if (stripe_offset < bargs->pend &&
    2955           0 :                     stripe_offset + stripe_length > bargs->pstart)
    2956             :                         return 0;
    2957             :         }
    2958             : 
    2959             :         return 1;
    2960             : }
    2961             : 
    2962             : /* [vstart, vend) */
    2963           0 : static int chunk_vrange_filter(struct extent_buffer *leaf,
    2964             :                                struct btrfs_chunk *chunk,
    2965             :                                u64 chunk_offset,
    2966             :                                struct btrfs_balance_args *bargs)
    2967             : {
    2968           0 :         if (chunk_offset < bargs->vend &&
    2969           0 :             chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
    2970             :                 /* at least part of the chunk is inside this vrange */
    2971             :                 return 0;
    2972             : 
    2973             :         return 1;
    2974             : }
    2975             : 
    2976             : static int chunk_soft_convert_filter(u64 chunk_type,
    2977             :                                      struct btrfs_balance_args *bargs)
    2978             : {
    2979           0 :         if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
    2980             :                 return 0;
    2981             : 
    2982           0 :         chunk_type = chunk_to_extended(chunk_type) &
    2983             :                                 BTRFS_EXTENDED_PROFILE_MASK;
    2984             : 
    2985           0 :         if (bargs->target == chunk_type)
    2986             :                 return 1;
    2987             : 
    2988             :         return 0;
    2989             : }
    2990             : 
    2991         144 : static int should_balance_chunk(struct btrfs_root *root,
    2992             :                                 struct extent_buffer *leaf,
    2993             :                                 struct btrfs_chunk *chunk, u64 chunk_offset)
    2994             : {
    2995         144 :         struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
    2996             :         struct btrfs_balance_args *bargs = NULL;
    2997             :         u64 chunk_type = btrfs_chunk_type(leaf, chunk);
    2998             : 
    2999             :         /* type filter */
    3000         144 :         if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
    3001         144 :               (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
    3002             :                 return 0;
    3003             :         }
    3004             : 
    3005         144 :         if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
    3006          44 :                 bargs = &bctl->data;
    3007         100 :         else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
    3008          50 :                 bargs = &bctl->sys;
    3009          50 :         else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
    3010          50 :                 bargs = &bctl->meta;
    3011             : 
    3012             :         /* profiles filter */
    3013         144 :         if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
    3014             :             chunk_profiles_filter(chunk_type, bargs)) {
    3015             :                 return 0;
    3016             :         }
    3017             : 
    3018             :         /* usage filter */
    3019         144 :         if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
    3020           0 :             chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
    3021             :                 return 0;
    3022             :         }
    3023             : 
    3024             :         /* devid filter */
    3025         144 :         if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
    3026           0 :             chunk_devid_filter(leaf, chunk, bargs)) {
    3027             :                 return 0;
    3028             :         }
    3029             : 
    3030             :         /* drange filter, makes sense only with devid filter */
    3031         144 :         if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
    3032           0 :             chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) {
    3033             :                 return 0;
    3034             :         }
    3035             : 
    3036             :         /* vrange filter */
    3037         144 :         if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
    3038           0 :             chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
    3039             :                 return 0;
    3040             :         }
    3041             : 
    3042             :         /* soft profile changing mode */
    3043         144 :         if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
    3044             :             chunk_soft_convert_filter(chunk_type, bargs)) {
    3045             :                 return 0;
    3046             :         }
    3047             : 
    3048             :         /*
    3049             :          * limited by count, must be the last filter
    3050             :          */
    3051         144 :         if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
    3052           0 :                 if (bargs->limit == 0)
    3053             :                         return 0;
    3054             :                 else
    3055           0 :                         bargs->limit--;
    3056             :         }
    3057             : 
    3058             :         return 1;
    3059             : }
    3060             : 
    3061          22 : static int __btrfs_balance(struct btrfs_fs_info *fs_info)
    3062             : {
    3063          22 :         struct btrfs_balance_control *bctl = fs_info->balance_ctl;
    3064         238 :         struct btrfs_root *chunk_root = fs_info->chunk_root;
    3065          22 :         struct btrfs_root *dev_root = fs_info->dev_root;
    3066             :         struct list_head *devices;
    3067             :         struct btrfs_device *device;
    3068             :         u64 old_size;
    3069             :         u64 size_to_free;
    3070             :         struct btrfs_chunk *chunk;
    3071             :         struct btrfs_path *path;
    3072             :         struct btrfs_key key;
    3073             :         struct btrfs_key found_key;
    3074             :         struct btrfs_trans_handle *trans;
    3075             :         struct extent_buffer *leaf;
    3076             :         int slot;
    3077             :         int ret;
    3078             :         int enospc_errors = 0;
    3079             :         bool counting = true;
    3080          22 :         u64 limit_data = bctl->data.limit;
    3081          22 :         u64 limit_meta = bctl->meta.limit;
    3082          22 :         u64 limit_sys = bctl->sys.limit;
    3083             : 
    3084             :         /* step one make some room on all the devices */
    3085          22 :         devices = &fs_info->fs_devices->devices;
    3086          44 :         list_for_each_entry(device, devices, dev_list) {
    3087          22 :                 old_size = device->total_bytes;
    3088             :                 size_to_free = div_factor(old_size, 1);
    3089          22 :                 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
    3090          44 :                 if (!device->writeable ||
    3091          22 :                     device->total_bytes - device->bytes_used > size_to_free ||
    3092           0 :                     device->is_tgtdev_for_dev_replace)
    3093          22 :                         continue;
    3094             : 
    3095           0 :                 ret = btrfs_shrink_device(device, old_size - size_to_free);
    3096           0 :                 if (ret == -ENOSPC)
    3097             :                         break;
    3098           0 :                 BUG_ON(ret);
    3099             : 
    3100           0 :                 trans = btrfs_start_transaction(dev_root, 0);
    3101           0 :                 BUG_ON(IS_ERR(trans));
    3102             : 
    3103           0 :                 ret = btrfs_grow_device(trans, device, old_size);
    3104           0 :                 BUG_ON(ret);
    3105             : 
    3106           0 :                 btrfs_end_transaction(trans, dev_root);
    3107             :         }
    3108             : 
    3109             :         /* step two, relocate all the chunks */
    3110          22 :         path = btrfs_alloc_path();
    3111          22 :         if (!path) {
    3112             :                 ret = -ENOMEM;
    3113             :                 goto error;
    3114             :         }
    3115             : 
    3116             :         /* zero out stat counters */
    3117             :         spin_lock(&fs_info->balance_lock);
    3118          22 :         memset(&bctl->stat, 0, sizeof(bctl->stat));
    3119             :         spin_unlock(&fs_info->balance_lock);
    3120             : again:
    3121          44 :         if (!counting) {
    3122          22 :                 bctl->data.limit = limit_data;
    3123          22 :                 bctl->meta.limit = limit_meta;
    3124          22 :                 bctl->sys.limit = limit_sys;
    3125             :         }
    3126          44 :         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
    3127          44 :         key.offset = (u64)-1;
    3128          44 :         key.type = BTRFS_CHUNK_ITEM_KEY;
    3129             : 
    3130             :         while (1) {
    3131         455 :                 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
    3132             :                     atomic_read(&fs_info->balance_cancel_req)) {
    3133             :                         ret = -ECANCELED;
    3134             :                         goto error;
    3135             :                 }
    3136             : 
    3137         182 :                 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
    3138         182 :                 if (ret < 0)
    3139             :                         goto error;
    3140             : 
    3141             :                 /*
    3142             :                  * this shouldn't happen, it means the last relocate
    3143             :                  * failed
    3144             :                  */
    3145         182 :                 if (ret == 0)
    3146           0 :                         BUG(); /* FIXME break ? */
    3147             : 
    3148         182 :                 ret = btrfs_previous_item(chunk_root, path, 0,
    3149             :                                           BTRFS_CHUNK_ITEM_KEY);
    3150         182 :                 if (ret) {
    3151             :                         ret = 0;
    3152             :                         break;
    3153             :                 }
    3154             : 
    3155         144 :                 leaf = path->nodes[0];
    3156         144 :                 slot = path->slots[0];
    3157         144 :                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
    3158             : 
    3159         144 :                 if (found_key.objectid != key.objectid)
    3160             :                         break;
    3161             : 
    3162         144 :                 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
    3163             : 
    3164         144 :                 if (!counting) {
    3165             :                         spin_lock(&fs_info->balance_lock);
    3166          72 :                         bctl->stat.considered++;
    3167             :                         spin_unlock(&fs_info->balance_lock);
    3168             :                 }
    3169             : 
    3170         288 :                 ret = should_balance_chunk(chunk_root, leaf, chunk,
    3171             :                                            found_key.offset);
    3172         144 :                 btrfs_release_path(path);
    3173         144 :                 if (!ret)
    3174             :                         goto loop;
    3175             : 
    3176         144 :                 if (counting) {
    3177             :                         spin_lock(&fs_info->balance_lock);
    3178          72 :                         bctl->stat.expected++;
    3179             :                         spin_unlock(&fs_info->balance_lock);
    3180             :                         goto loop;
    3181             :                 }
    3182             : 
    3183         144 :                 ret = btrfs_relocate_chunk(chunk_root,
    3184             :                                            chunk_root->root_key.objectid,
    3185             :                                            found_key.objectid,
    3186             :                                            found_key.offset);
    3187          72 :                 if (ret && ret != -ENOSPC)
    3188             :                         goto error;
    3189          72 :                 if (ret == -ENOSPC) {
    3190           0 :                         enospc_errors++;
    3191             :                 } else {
    3192             :                         spin_lock(&fs_info->balance_lock);
    3193          72 :                         bctl->stat.completed++;
    3194             :                         spin_unlock(&fs_info->balance_lock);
    3195             :                 }
    3196             : loop:
    3197         144 :                 if (found_key.offset == 0)
    3198             :                         break;
    3199         138 :                 key.offset = found_key.offset - 1;
    3200         138 :         }
    3201             : 
    3202          44 :         if (counting) {
    3203          22 :                 btrfs_release_path(path);
    3204             :                 counting = false;
    3205          22 :                 goto again;
    3206             :         }
    3207             : error:
    3208          22 :         btrfs_free_path(path);
    3209          22 :         if (enospc_errors) {
    3210           0 :                 btrfs_info(fs_info, "%d enospc errors during balance",
    3211             :                        enospc_errors);
    3212           0 :                 if (!ret)
    3213             :                         ret = -ENOSPC;
    3214             :         }
    3215             : 
    3216          22 :         return ret;
    3217             : }
    3218             : 
    3219             : /**
    3220             :  * alloc_profile_is_valid - see if a given profile is valid and reduced
    3221             :  * @flags: profile to validate
    3222             :  * @extended: if true @flags is treated as an extended profile
    3223             :  */
    3224             : static int alloc_profile_is_valid(u64 flags, int extended)
    3225             : {
    3226             :         u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
    3227             :                                BTRFS_BLOCK_GROUP_PROFILE_MASK);
    3228             : 
    3229          87 :         flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
    3230             : 
    3231             :         /* 1) check that all other bits are zeroed */
    3232          87 :         if (flags & ~mask)
    3233             :                 return 0;
    3234             : 
    3235             :         /* 2) see if profile is reduced */
    3236          87 :         if (flags == 0)
    3237             :                 return !extended; /* "0" is valid for usual profiles */
    3238             : 
    3239             :         /* true if exactly one bit set */
    3240          46 :         return (flags & (flags - 1)) == 0;
    3241             : }
    3242             : 
    3243             : static inline int balance_need_close(struct btrfs_fs_info *fs_info)
    3244             : {
    3245             :         /* cancel requested || normal exit path */
    3246          44 :         return atomic_read(&fs_info->balance_cancel_req) ||
    3247          22 :                 (atomic_read(&fs_info->balance_pause_req) == 0 &&
    3248             :                  atomic_read(&fs_info->balance_cancel_req) == 0);
    3249             : }
    3250             : 
    3251          22 : static void __cancel_balance(struct btrfs_fs_info *fs_info)
    3252             : {
    3253             :         int ret;
    3254             : 
    3255          22 :         unset_balance_control(fs_info);
    3256          22 :         ret = del_balance_item(fs_info->tree_root);
    3257          22 :         if (ret)
    3258           0 :                 btrfs_std_error(fs_info, ret);
    3259             : 
    3260             :         atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
    3261          22 : }
    3262             : 
    3263             : /*
    3264             :  * Should be called with both balance and volume mutexes held
    3265             :  */
    3266          22 : int btrfs_balance(struct btrfs_balance_control *bctl,
    3267             :                   struct btrfs_ioctl_balance_args *bargs)
    3268             : {
    3269          22 :         struct btrfs_fs_info *fs_info = bctl->fs_info;
    3270             :         u64 allowed;
    3271             :         int mixed = 0;
    3272             :         int ret;
    3273             :         u64 num_devices;
    3274             :         unsigned seq;
    3275             : 
    3276          44 :         if (btrfs_fs_closing(fs_info) ||
    3277          22 :             atomic_read(&fs_info->balance_pause_req) ||
    3278             :             atomic_read(&fs_info->balance_cancel_req)) {
    3279             :                 ret = -EINVAL;
    3280             :                 goto out;
    3281             :         }
    3282             : 
    3283          22 :         allowed = btrfs_super_incompat_flags(fs_info->super_copy);
    3284          22 :         if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
    3285             :                 mixed = 1;
    3286             : 
    3287             :         /*
    3288             :          * In case of mixed groups both data and meta should be picked,
    3289             :          * and identical options should be given for both of them.
    3290             :          */
    3291             :         allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
    3292          22 :         if (mixed && (bctl->flags & allowed)) {
    3293           0 :                 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
    3294           0 :                     !(bctl->flags & BTRFS_BALANCE_METADATA) ||
    3295           0 :                     memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
    3296           0 :                         btrfs_err(fs_info, "with mixed groups data and "
    3297             :                                    "metadata balance options must be the same");
    3298             :                         ret = -EINVAL;
    3299           0 :                         goto out;
    3300             :                 }
    3301             :         }
    3302             : 
    3303          22 :         num_devices = fs_info->fs_devices->num_devices;
    3304          22 :         btrfs_dev_replace_lock(&fs_info->dev_replace);
    3305          22 :         if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
    3306           0 :                 BUG_ON(num_devices < 1);
    3307           0 :                 num_devices--;
    3308             :         }
    3309          22 :         btrfs_dev_replace_unlock(&fs_info->dev_replace);
    3310             :         allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
    3311          22 :         if (num_devices == 1)
    3312             :                 allowed |= BTRFS_BLOCK_GROUP_DUP;
    3313           0 :         else if (num_devices > 1)
    3314             :                 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
    3315          22 :         if (num_devices > 2)
    3316           0 :                 allowed |= BTRFS_BLOCK_GROUP_RAID5;
    3317          22 :         if (num_devices > 3)
    3318           0 :                 allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
    3319             :                             BTRFS_BLOCK_GROUP_RAID6);
    3320          22 :         if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
    3321           0 :             (!alloc_profile_is_valid(bctl->data.target, 1) ||
    3322           0 :              (bctl->data.target & ~allowed))) {
    3323           0 :                 btrfs_err(fs_info, "unable to start balance with target "
    3324             :                            "data profile %llu",
    3325             :                        bctl->data.target);
    3326             :                 ret = -EINVAL;
    3327           0 :                 goto out;
    3328             :         }
    3329          22 :         if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
    3330           0 :             (!alloc_profile_is_valid(bctl->meta.target, 1) ||
    3331           0 :              (bctl->meta.target & ~allowed))) {
    3332           0 :                 btrfs_err(fs_info,
    3333             :                            "unable to start balance with target metadata profile %llu",
    3334             :                        bctl->meta.target);
    3335             :                 ret = -EINVAL;
    3336           0 :                 goto out;
    3337             :         }
    3338          22 :         if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
    3339           0 :             (!alloc_profile_is_valid(bctl->sys.target, 1) ||
    3340           0 :              (bctl->sys.target & ~allowed))) {
    3341           0 :                 btrfs_err(fs_info,
    3342             :                            "unable to start balance with target system profile %llu",
    3343             :                        bctl->sys.target);
    3344             :                 ret = -EINVAL;
    3345           0 :                 goto out;
    3346             :         }
    3347             : 
    3348             :         /* allow dup'ed data chunks only in mixed mode */
    3349          22 :         if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
    3350           0 :             (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
    3351           0 :                 btrfs_err(fs_info, "dup for data is not allowed");
    3352             :                 ret = -EINVAL;
    3353           0 :                 goto out;
    3354             :         }
    3355             : 
    3356             :         /* allow to reduce meta or sys integrity only if force set */
    3357             :         allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
    3358             :                         BTRFS_BLOCK_GROUP_RAID10 |
    3359             :                         BTRFS_BLOCK_GROUP_RAID5 |
    3360             :                         BTRFS_BLOCK_GROUP_RAID6;
    3361             :         do {
    3362             :                 seq = read_seqbegin(&fs_info->profiles_lock);
    3363             : 
    3364          22 :                 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
    3365           0 :                      (fs_info->avail_system_alloc_bits & allowed) &&
    3366          22 :                      !(bctl->sys.target & allowed)) ||
    3367          22 :                     ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
    3368           0 :                      (fs_info->avail_metadata_alloc_bits & allowed) &&
    3369           0 :                      !(bctl->meta.target & allowed))) {
    3370           0 :                         if (bctl->flags & BTRFS_BALANCE_FORCE) {
    3371           0 :                                 btrfs_info(fs_info, "force reducing metadata integrity");
    3372             :                         } else {
    3373           0 :                                 btrfs_err(fs_info, "balance will reduce metadata "
    3374             :                                            "integrity, use force if you want this");
    3375             :                                 ret = -EINVAL;
    3376           0 :                                 goto out;
    3377             :                         }
    3378             :                 }
    3379          22 :         } while (read_seqretry(&fs_info->profiles_lock, seq));
    3380             : 
    3381          22 :         if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
    3382             :                 int num_tolerated_disk_barrier_failures;
    3383           0 :                 u64 target = bctl->sys.target;
    3384             : 
    3385           0 :                 num_tolerated_disk_barrier_failures =
    3386             :                         btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
    3387           0 :                 if (num_tolerated_disk_barrier_failures > 0 &&
    3388           0 :                     (target &
    3389             :                      (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
    3390             :                       BTRFS_AVAIL_ALLOC_BIT_SINGLE)))
    3391             :                         num_tolerated_disk_barrier_failures = 0;
    3392           0 :                 else if (num_tolerated_disk_barrier_failures > 1 &&
    3393           0 :                          (target &
    3394             :                           (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)))
    3395             :                         num_tolerated_disk_barrier_failures = 1;
    3396             : 
    3397           0 :                 fs_info->num_tolerated_disk_barrier_failures =
    3398             :                         num_tolerated_disk_barrier_failures;
    3399             :         }
    3400             : 
    3401          22 :         ret = insert_balance_item(fs_info->tree_root, bctl);
    3402          22 :         if (ret && ret != -EEXIST)
    3403             :                 goto out;
    3404             : 
    3405          22 :         if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
    3406          22 :                 BUG_ON(ret == -EEXIST);
    3407          22 :                 set_balance_control(bctl);
    3408             :         } else {
    3409           0 :                 BUG_ON(ret != -EEXIST);
    3410             :                 spin_lock(&fs_info->balance_lock);
    3411           0 :                 update_balance_args(bctl);
    3412             :                 spin_unlock(&fs_info->balance_lock);
    3413             :         }
    3414             : 
    3415          22 :         atomic_inc(&fs_info->balance_running);
    3416          22 :         mutex_unlock(&fs_info->balance_mutex);
    3417             : 
    3418          22 :         ret = __btrfs_balance(fs_info);
    3419             : 
    3420          22 :         mutex_lock(&fs_info->balance_mutex);
    3421             :         atomic_dec(&fs_info->balance_running);
    3422             : 
    3423          22 :         if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
    3424           0 :                 fs_info->num_tolerated_disk_barrier_failures =
    3425           0 :                         btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
    3426             :         }
    3427             : 
    3428          22 :         if (bargs) {
    3429          22 :                 memset(bargs, 0, sizeof(*bargs));
    3430          22 :                 update_ioctl_balance_args(fs_info, 0, bargs);
    3431             :         }
    3432             : 
    3433          44 :         if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
    3434             :             balance_need_close(fs_info)) {
    3435          22 :                 __cancel_balance(fs_info);
    3436             :         }
    3437             : 
    3438          22 :         wake_up(&fs_info->balance_wait_q);
    3439             : 
    3440          22 :         return ret;
    3441             : out:
    3442           0 :         if (bctl->flags & BTRFS_BALANCE_RESUME)
    3443           0 :                 __cancel_balance(fs_info);
    3444             :         else {
    3445           0 :                 kfree(bctl);
    3446             :                 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
    3447             :         }
    3448           0 :         return ret;
    3449             : }
    3450             : 
    3451           0 : static int balance_kthread(void *data)
    3452             : {
    3453             :         struct btrfs_fs_info *fs_info = data;
    3454             :         int ret = 0;
    3455             : 
    3456           0 :         mutex_lock(&fs_info->volume_mutex);
    3457           0 :         mutex_lock(&fs_info->balance_mutex);
    3458             : 
    3459           0 :         if (fs_info->balance_ctl) {
    3460           0 :                 btrfs_info(fs_info, "continuing balance");
    3461           0 :                 ret = btrfs_balance(fs_info->balance_ctl, NULL);
    3462             :         }
    3463             : 
    3464           0 :         mutex_unlock(&fs_info->balance_mutex);
    3465           0 :         mutex_unlock(&fs_info->volume_mutex);
    3466             : 
    3467           0 :         return ret;
    3468             : }
    3469             : 
    3470         194 : int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
    3471             : {
    3472             :         struct task_struct *tsk;
    3473             : 
    3474             :         spin_lock(&fs_info->balance_lock);
    3475         194 :         if (!fs_info->balance_ctl) {
    3476             :                 spin_unlock(&fs_info->balance_lock);
    3477         194 :                 return 0;
    3478             :         }
    3479             :         spin_unlock(&fs_info->balance_lock);
    3480             : 
    3481           0 :         if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
    3482           0 :                 btrfs_info(fs_info, "force skipping balance");
    3483           0 :                 return 0;
    3484             :         }
    3485             : 
    3486           0 :         tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
    3487           0 :         return PTR_ERR_OR_ZERO(tsk);
    3488             : }
    3489             : 
    3490         221 : int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
    3491             : {
    3492             :         struct btrfs_balance_control *bctl;
    3493             :         struct btrfs_balance_item *item;
    3494             :         struct btrfs_disk_balance_args disk_bargs;
    3495             :         struct btrfs_path *path;
    3496             :         struct extent_buffer *leaf;
    3497             :         struct btrfs_key key;
    3498             :         int ret;
    3499             : 
    3500         221 :         path = btrfs_alloc_path();
    3501         221 :         if (!path)
    3502             :                 return -ENOMEM;
    3503             : 
    3504         221 :         key.objectid = BTRFS_BALANCE_OBJECTID;
    3505         221 :         key.type = BTRFS_BALANCE_ITEM_KEY;
    3506         221 :         key.offset = 0;
    3507             : 
    3508         221 :         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
    3509         221 :         if (ret < 0)
    3510             :                 goto out;
    3511         221 :         if (ret > 0) { /* ret = -ENOENT; */
    3512             :                 ret = 0;
    3513             :                 goto out;
    3514             :         }
    3515             : 
    3516           0 :         bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
    3517           0 :         if (!bctl) {
    3518             :                 ret = -ENOMEM;
    3519             :                 goto out;
    3520             :         }
    3521             : 
    3522           0 :         leaf = path->nodes[0];
    3523           0 :         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
    3524             : 
    3525           0 :         bctl->fs_info = fs_info;
    3526           0 :         bctl->flags = btrfs_balance_flags(leaf, item);
    3527           0 :         bctl->flags |= BTRFS_BALANCE_RESUME;
    3528             : 
    3529             :         btrfs_balance_data(leaf, item, &disk_bargs);
    3530           0 :         btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
    3531             :         btrfs_balance_meta(leaf, item, &disk_bargs);
    3532           0 :         btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
    3533             :         btrfs_balance_sys(leaf, item, &disk_bargs);
    3534           0 :         btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
    3535             : 
    3536           0 :         WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
    3537             : 
    3538           0 :         mutex_lock(&fs_info->volume_mutex);
    3539           0 :         mutex_lock(&fs_info->balance_mutex);
    3540             : 
    3541           0 :         set_balance_control(bctl);
    3542             : 
    3543           0 :         mutex_unlock(&fs_info->balance_mutex);
    3544           0 :         mutex_unlock(&fs_info->volume_mutex);
    3545             : out:
    3546         221 :         btrfs_free_path(path);
    3547         221 :         return ret;
    3548             : }
    3549             : 
    3550         222 : int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
    3551             : {
    3552             :         int ret = 0;
    3553             : 
    3554         222 :         mutex_lock(&fs_info->balance_mutex);
    3555         222 :         if (!fs_info->balance_ctl) {
    3556         222 :                 mutex_unlock(&fs_info->balance_mutex);
    3557         222 :                 return -ENOTCONN;
    3558             :         }
    3559             : 
    3560           0 :         if (atomic_read(&fs_info->balance_running)) {
    3561           0 :                 atomic_inc(&fs_info->balance_pause_req);
    3562           0 :                 mutex_unlock(&fs_info->balance_mutex);
    3563             : 
    3564           0 :                 wait_event(fs_info->balance_wait_q,
    3565             :                            atomic_read(&fs_info->balance_running) == 0);
    3566             : 
    3567           0 :                 mutex_lock(&fs_info->balance_mutex);
    3568             :                 /* we are good with balance_ctl ripped off from under us */
    3569           0 :                 BUG_ON(atomic_read(&fs_info->balance_running));
    3570             :                 atomic_dec(&fs_info->balance_pause_req);
    3571             :         } else {
    3572             :                 ret = -ENOTCONN;
    3573             :         }
    3574             : 
    3575           0 :         mutex_unlock(&fs_info->balance_mutex);
    3576           0 :         return ret;
    3577             : }
    3578             : 
    3579           0 : int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
    3580             : {
    3581           0 :         if (fs_info->sb->s_flags & MS_RDONLY)
    3582             :                 return -EROFS;
    3583             : 
    3584           0 :         mutex_lock(&fs_info->balance_mutex);
    3585           0 :         if (!fs_info->balance_ctl) {
    3586           0 :                 mutex_unlock(&fs_info->balance_mutex);
    3587           0 :                 return -ENOTCONN;
    3588             :         }
    3589             : 
    3590           0 :         atomic_inc(&fs_info->balance_cancel_req);
    3591             :         /*
    3592             :          * if we are running just wait and return, balance item is
    3593             :          * deleted in btrfs_balance in this case
    3594             :          */
    3595           0 :         if (atomic_read(&fs_info->balance_running)) {
    3596           0 :                 mutex_unlock(&fs_info->balance_mutex);
    3597           0 :                 wait_event(fs_info->balance_wait_q,
    3598             :                            atomic_read(&fs_info->balance_running) == 0);
    3599           0 :                 mutex_lock(&fs_info->balance_mutex);
    3600             :         } else {
    3601             :                 /* __cancel_balance needs volume_mutex */
    3602           0 :                 mutex_unlock(&fs_info->balance_mutex);
    3603           0 :                 mutex_lock(&fs_info->volume_mutex);
    3604           0 :                 mutex_lock(&fs_info->balance_mutex);
    3605             : 
    3606           0 :                 if (fs_info->balance_ctl)
    3607           0 :                         __cancel_balance(fs_info);
    3608             : 
    3609           0 :                 mutex_unlock(&fs_info->volume_mutex);
    3610             :         }
    3611             : 
    3612           0 :         BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
    3613             :         atomic_dec(&fs_info->balance_cancel_req);
    3614           0 :         mutex_unlock(&fs_info->balance_mutex);
    3615           0 :         return 0;
    3616             : }
    3617             : 
    3618          99 : static int btrfs_uuid_scan_kthread(void *data)
    3619             : {
    3620             :         struct btrfs_fs_info *fs_info = data;
    3621          99 :         struct btrfs_root *root = fs_info->tree_root;
    3622             :         struct btrfs_key key;
    3623             :         struct btrfs_key max_key;
    3624             :         struct btrfs_path *path = NULL;
    3625             :         int ret = 0;
    3626             :         struct extent_buffer *eb;
    3627             :         int slot;
    3628             :         struct btrfs_root_item root_item;
    3629             :         u32 item_size;
    3630             :         struct btrfs_trans_handle *trans = NULL;
    3631             : 
    3632          99 :         path = btrfs_alloc_path();
    3633          99 :         if (!path) {
    3634             :                 ret = -ENOMEM;
    3635             :                 goto out;
    3636             :         }
    3637             : 
    3638          99 :         key.objectid = 0;
    3639          99 :         key.type = BTRFS_ROOT_ITEM_KEY;
    3640          99 :         key.offset = 0;
    3641             : 
    3642             :         max_key.objectid = (u64)-1;
    3643             :         max_key.type = BTRFS_ROOT_ITEM_KEY;
    3644             :         max_key.offset = (u64)-1;
    3645             : 
    3646          99 :         path->keep_locks = 1;
    3647             : 
    3648             :         while (1) {
    3649        1433 :                 ret = btrfs_search_forward(root, &key, path, 0);
    3650        1433 :                 if (ret) {
    3651          99 :                         if (ret > 0)
    3652             :                                 ret = 0;
    3653             :                         break;
    3654             :                 }
    3655             : 
    3656        1929 :                 if (key.type != BTRFS_ROOT_ITEM_KEY ||
    3657         595 :                     (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
    3658         199 :                      key.objectid != BTRFS_FS_TREE_OBJECTID) ||
    3659             :                     key.objectid > BTRFS_LAST_FREE_OBJECTID)
    3660             :                         goto skip;
    3661             : 
    3662         100 :                 eb = path->nodes[0];
    3663         100 :                 slot = path->slots[0];
    3664             :                 item_size = btrfs_item_size_nr(eb, slot);
    3665         100 :                 if (item_size < sizeof(root_item))
    3666             :                         goto skip;
    3667             : 
    3668         100 :                 read_extent_buffer(eb, &root_item,
    3669             :                                    btrfs_item_ptr_offset(eb, slot),
    3670             :                                    (int)sizeof(root_item));
    3671         100 :                 if (btrfs_root_refs(&root_item) == 0)
    3672             :                         goto skip;
    3673             : 
    3674         200 :                 if (!btrfs_is_empty_uuid(root_item.uuid) ||
    3675         100 :                     !btrfs_is_empty_uuid(root_item.received_uuid)) {
    3676           0 :                         if (trans)
    3677             :                                 goto update_tree;
    3678             : 
    3679           0 :                         btrfs_release_path(path);
    3680             :                         /*
    3681             :                          * 1 - subvol uuid item
    3682             :                          * 1 - received_subvol uuid item
    3683             :                          */
    3684           0 :                         trans = btrfs_start_transaction(fs_info->uuid_root, 2);
    3685           0 :                         if (IS_ERR(trans)) {
    3686           0 :                                 ret = PTR_ERR(trans);
    3687           0 :                                 break;
    3688             :                         }
    3689           0 :                         continue;
    3690             :                 } else {
    3691             :                         goto skip;
    3692             :                 }
    3693             : update_tree:
    3694           0 :                 if (!btrfs_is_empty_uuid(root_item.uuid)) {
    3695           0 :                         ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
    3696             :                                                   root_item.uuid,
    3697             :                                                   BTRFS_UUID_KEY_SUBVOL,
    3698             :                                                   key.objectid);
    3699           0 :                         if (ret < 0) {
    3700           0 :                                 btrfs_warn(fs_info, "uuid_tree_add failed %d",
    3701             :                                         ret);
    3702           0 :                                 break;
    3703             :                         }
    3704             :                 }
    3705             : 
    3706           0 :                 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
    3707           0 :                         ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
    3708             :                                                   root_item.received_uuid,
    3709             :                                                  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
    3710             :                                                   key.objectid);
    3711           0 :                         if (ret < 0) {
    3712           0 :                                 btrfs_warn(fs_info, "uuid_tree_add failed %d",
    3713             :                                         ret);
    3714           0 :                                 break;
    3715             :                         }
    3716             :                 }
    3717             : 
    3718             : skip:
    3719        1334 :                 if (trans) {
    3720           0 :                         ret = btrfs_end_transaction(trans, fs_info->uuid_root);
    3721             :                         trans = NULL;
    3722           0 :                         if (ret)
    3723             :                                 break;
    3724             :                 }
    3725             : 
    3726        1334 :                 btrfs_release_path(path);
    3727        1334 :                 if (key.offset < (u64)-1) {
    3728        1334 :                         key.offset++;
    3729           0 :                 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
    3730           0 :                         key.offset = 0;
    3731           0 :                         key.type = BTRFS_ROOT_ITEM_KEY;
    3732           0 :                 } else if (key.objectid < (u64)-1) {
    3733           0 :                         key.offset = 0;
    3734           0 :                         key.type = BTRFS_ROOT_ITEM_KEY;
    3735           0 :                         key.objectid++;
    3736             :                 } else {
    3737             :                         break;
    3738             :                 }
    3739        1334 :                 cond_resched();
    3740             :         }
    3741             : 
    3742             : out:
    3743          99 :         btrfs_free_path(path);
    3744          99 :         if (trans && !IS_ERR(trans))
    3745           0 :                 btrfs_end_transaction(trans, fs_info->uuid_root);
    3746          99 :         if (ret)
    3747           0 :                 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
    3748             :         else
    3749          99 :                 fs_info->update_uuid_tree_gen = 1;
    3750          99 :         up(&fs_info->uuid_tree_rescan_sem);
    3751          99 :         return 0;
    3752             : }
    3753             : 
    3754             : /*
    3755             :  * Callback for btrfs_uuid_tree_iterate().
    3756             :  * returns:
    3757             :  * 0    check succeeded, the entry is not outdated.
    3758             :  * < 0       if an error occured.
    3759             :  * > 0       if the check failed, which means the caller shall remove the entry.
    3760             :  */
    3761           0 : static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
    3762             :                                        u8 *uuid, u8 type, u64 subid)
    3763             : {
    3764             :         struct btrfs_key key;
    3765             :         int ret = 0;
    3766             :         struct btrfs_root *subvol_root;
    3767             : 
    3768           0 :         if (type != BTRFS_UUID_KEY_SUBVOL &&
    3769             :             type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
    3770             :                 goto out;
    3771             : 
    3772           0 :         key.objectid = subid;
    3773           0 :         key.type = BTRFS_ROOT_ITEM_KEY;
    3774           0 :         key.offset = (u64)-1;
    3775             :         subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
    3776           0 :         if (IS_ERR(subvol_root)) {
    3777           0 :                 ret = PTR_ERR(subvol_root);
    3778           0 :                 if (ret == -ENOENT)
    3779             :                         ret = 1;
    3780             :                 goto out;
    3781             :         }
    3782             : 
    3783           0 :         switch (type) {
    3784             :         case BTRFS_UUID_KEY_SUBVOL:
    3785           0 :                 if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
    3786             :                         ret = 1;
    3787             :                 break;
    3788             :         case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
    3789           0 :                 if (memcmp(uuid, subvol_root->root_item.received_uuid,
    3790             :                            BTRFS_UUID_SIZE))
    3791             :                         ret = 1;
    3792             :                 break;
    3793             :         }
    3794             : 
    3795             : out:
    3796           0 :         return ret;
    3797             : }
    3798             : 
    3799           0 : static int btrfs_uuid_rescan_kthread(void *data)
    3800             : {
    3801             :         struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
    3802             :         int ret;
    3803             : 
    3804             :         /*
    3805             :          * 1st step is to iterate through the existing UUID tree and
    3806             :          * to delete all entries that contain outdated data.
    3807             :          * 2nd step is to add all missing entries to the UUID tree.
    3808             :          */
    3809           0 :         ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
    3810           0 :         if (ret < 0) {
    3811           0 :                 btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
    3812           0 :                 up(&fs_info->uuid_tree_rescan_sem);
    3813           0 :                 return ret;
    3814             :         }
    3815           0 :         return btrfs_uuid_scan_kthread(data);
    3816             : }
    3817             : 
    3818          99 : int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
    3819             : {
    3820             :         struct btrfs_trans_handle *trans;
    3821          99 :         struct btrfs_root *tree_root = fs_info->tree_root;
    3822             :         struct btrfs_root *uuid_root;
    3823             :         struct task_struct *task;
    3824             :         int ret;
    3825             : 
    3826             :         /*
    3827             :          * 1 - root node
    3828             :          * 1 - root item
    3829             :          */
    3830          99 :         trans = btrfs_start_transaction(tree_root, 2);
    3831          99 :         if (IS_ERR(trans))
    3832           0 :                 return PTR_ERR(trans);
    3833             : 
    3834          99 :         uuid_root = btrfs_create_tree(trans, fs_info,
    3835             :                                       BTRFS_UUID_TREE_OBJECTID);
    3836          99 :         if (IS_ERR(uuid_root)) {
    3837           0 :                 btrfs_abort_transaction(trans, tree_root,
    3838             :                                         PTR_ERR(uuid_root));
    3839           0 :                 return PTR_ERR(uuid_root);
    3840             :         }
    3841             : 
    3842          99 :         fs_info->uuid_root = uuid_root;
    3843             : 
    3844          99 :         ret = btrfs_commit_transaction(trans, tree_root);
    3845          99 :         if (ret)
    3846             :                 return ret;
    3847             : 
    3848          99 :         down(&fs_info->uuid_tree_rescan_sem);
    3849         198 :         task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
    3850          99 :         if (IS_ERR(task)) {
    3851             :                 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
    3852           0 :                 btrfs_warn(fs_info, "failed to start uuid_scan task");
    3853           0 :                 up(&fs_info->uuid_tree_rescan_sem);
    3854           0 :                 return PTR_ERR(task);
    3855             :         }
    3856             : 
    3857             :         return 0;
    3858             : }
    3859             : 
    3860           0 : int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
    3861             : {
    3862             :         struct task_struct *task;
    3863             : 
    3864           0 :         down(&fs_info->uuid_tree_rescan_sem);
    3865           0 :         task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
    3866           0 :         if (IS_ERR(task)) {
    3867             :                 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
    3868           0 :                 btrfs_warn(fs_info, "failed to start uuid_rescan task");
    3869           0 :                 up(&fs_info->uuid_tree_rescan_sem);
    3870           0 :                 return PTR_ERR(task);
    3871             :         }
    3872             : 
    3873             :         return 0;
    3874             : }
    3875             : 
    3876             : /*
    3877             :  * shrinking a device means finding all of the device extents past
    3878             :  * the new size, and then following the back refs to the chunks.
    3879             :  * The chunk relocation code actually frees the device extent
    3880             :  */
    3881           0 : int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
    3882             : {
    3883             :         struct btrfs_trans_handle *trans;
    3884           0 :         struct btrfs_root *root = device->dev_root;
    3885             :         struct btrfs_dev_extent *dev_extent = NULL;
    3886             :         struct btrfs_path *path;
    3887             :         u64 length;
    3888             :         u64 chunk_tree;
    3889             :         u64 chunk_objectid;
    3890             :         u64 chunk_offset;
    3891             :         int ret;
    3892             :         int slot;
    3893             :         int failed = 0;
    3894             :         bool retried = false;
    3895             :         struct extent_buffer *l;
    3896             :         struct btrfs_key key;
    3897           0 :         struct btrfs_super_block *super_copy = root->fs_info->super_copy;
    3898             :         u64 old_total = btrfs_super_total_bytes(super_copy);
    3899           0 :         u64 old_size = device->total_bytes;
    3900           0 :         u64 diff = device->total_bytes - new_size;
    3901             : 
    3902           0 :         if (device->is_tgtdev_for_dev_replace)
    3903             :                 return -EINVAL;
    3904             : 
    3905           0 :         path = btrfs_alloc_path();
    3906           0 :         if (!path)
    3907             :                 return -ENOMEM;
    3908             : 
    3909           0 :         path->reada = 2;
    3910             : 
    3911             :         lock_chunks(root);
    3912             : 
    3913           0 :         device->total_bytes = new_size;
    3914           0 :         if (device->writeable) {
    3915           0 :                 device->fs_devices->total_rw_bytes -= diff;
    3916           0 :                 spin_lock(&root->fs_info->free_chunk_lock);
    3917           0 :                 root->fs_info->free_chunk_space -= diff;
    3918           0 :                 spin_unlock(&root->fs_info->free_chunk_lock);
    3919             :         }
    3920             :         unlock_chunks(root);
    3921             : 
    3922             : again:
    3923           0 :         key.objectid = device->devid;
    3924           0 :         key.offset = (u64)-1;
    3925           0 :         key.type = BTRFS_DEV_EXTENT_KEY;
    3926             : 
    3927             :         do {
    3928           0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    3929           0 :                 if (ret < 0)
    3930             :                         goto done;
    3931             : 
    3932           0 :                 ret = btrfs_previous_item(root, path, 0, key.type);
    3933           0 :                 if (ret < 0)
    3934             :                         goto done;
    3935           0 :                 if (ret) {
    3936             :                         ret = 0;
    3937           0 :                         btrfs_release_path(path);
    3938           0 :                         break;
    3939             :                 }
    3940             : 
    3941           0 :                 l = path->nodes[0];
    3942           0 :                 slot = path->slots[0];
    3943           0 :                 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
    3944             : 
    3945           0 :                 if (key.objectid != device->devid) {
    3946           0 :                         btrfs_release_path(path);
    3947           0 :                         break;
    3948             :                 }
    3949             : 
    3950           0 :                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
    3951             :                 length = btrfs_dev_extent_length(l, dev_extent);
    3952             : 
    3953           0 :                 if (key.offset + length <= new_size) {
    3954           0 :                         btrfs_release_path(path);
    3955           0 :                         break;
    3956             :                 }
    3957             : 
    3958             :                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
    3959             :                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
    3960             :                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
    3961           0 :                 btrfs_release_path(path);
    3962             : 
    3963           0 :                 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
    3964             :                                            chunk_offset);
    3965           0 :                 if (ret && ret != -ENOSPC)
    3966             :                         goto done;
    3967           0 :                 if (ret == -ENOSPC)
    3968           0 :                         failed++;
    3969           0 :         } while (key.offset-- > 0);
    3970             : 
    3971           0 :         if (failed && !retried) {
    3972             :                 failed = 0;
    3973             :                 retried = true;
    3974             :                 goto again;
    3975           0 :         } else if (failed && retried) {
    3976             :                 ret = -ENOSPC;
    3977             :                 lock_chunks(root);
    3978             : 
    3979           0 :                 device->total_bytes = old_size;
    3980           0 :                 if (device->writeable)
    3981           0 :                         device->fs_devices->total_rw_bytes += diff;
    3982           0 :                 spin_lock(&root->fs_info->free_chunk_lock);
    3983           0 :                 root->fs_info->free_chunk_space += diff;
    3984           0 :                 spin_unlock(&root->fs_info->free_chunk_lock);
    3985             :                 unlock_chunks(root);
    3986             :                 goto done;
    3987             :         }
    3988             : 
    3989             :         /* Shrinking succeeded, else we would be at "done". */
    3990           0 :         trans = btrfs_start_transaction(root, 0);
    3991           0 :         if (IS_ERR(trans)) {
    3992           0 :                 ret = PTR_ERR(trans);
    3993           0 :                 goto done;
    3994             :         }
    3995             : 
    3996             :         lock_chunks(root);
    3997             : 
    3998           0 :         device->disk_total_bytes = new_size;
    3999             :         /* Now btrfs_update_device() will change the on-disk size. */
    4000           0 :         ret = btrfs_update_device(trans, device);
    4001           0 :         if (ret) {
    4002             :                 unlock_chunks(root);
    4003           0 :                 btrfs_end_transaction(trans, root);
    4004           0 :                 goto done;
    4005             :         }
    4006           0 :         WARN_ON(diff > old_total);
    4007           0 :         btrfs_set_super_total_bytes(super_copy, old_total - diff);
    4008             :         unlock_chunks(root);
    4009           0 :         btrfs_end_transaction(trans, root);
    4010             : done:
    4011           0 :         btrfs_free_path(path);
    4012           0 :         return ret;
    4013             : }
    4014             : 
    4015          22 : static int btrfs_add_system_chunk(struct btrfs_root *root,
    4016             :                            struct btrfs_key *key,
    4017             :                            struct btrfs_chunk *chunk, int item_size)
    4018             : {
    4019          22 :         struct btrfs_super_block *super_copy = root->fs_info->super_copy;
    4020             :         struct btrfs_disk_key disk_key;
    4021             :         u32 array_size;
    4022             :         u8 *ptr;
    4023             : 
    4024             :         array_size = btrfs_super_sys_array_size(super_copy);
    4025          22 :         if (array_size + item_size + sizeof(disk_key)
    4026             :                         > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
    4027             :                 return -EFBIG;
    4028             : 
    4029          22 :         ptr = super_copy->sys_chunk_array + array_size;
    4030             :         btrfs_cpu_key_to_disk(&disk_key, key);
    4031          22 :         memcpy(ptr, &disk_key, sizeof(disk_key));
    4032          22 :         ptr += sizeof(disk_key);
    4033          22 :         memcpy(ptr, chunk, item_size);
    4034          22 :         item_size += sizeof(disk_key);
    4035          22 :         btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
    4036             :         return 0;
    4037             : }
    4038             : 
    4039             : /*
    4040             :  * sort the devices in descending order by max_avail, total_avail
    4041             :  */
    4042           2 : static int btrfs_cmp_device_info(const void *a, const void *b)
    4043             : {
    4044             :         const struct btrfs_device_info *di_a = a;
    4045             :         const struct btrfs_device_info *di_b = b;
    4046             : 
    4047           2 :         if (di_a->max_avail > di_b->max_avail)
    4048             :                 return -1;
    4049           2 :         if (di_a->max_avail < di_b->max_avail)
    4050             :                 return 1;
    4051           2 :         if (di_a->total_avail > di_b->total_avail)
    4052             :                 return -1;
    4053           2 :         if (di_a->total_avail < di_b->total_avail)
    4054             :                 return 1;
    4055           0 :         return 0;
    4056             : }
    4057             : 
    4058             : static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
    4059             :         [BTRFS_RAID_RAID10] = {
    4060             :                 .sub_stripes    = 2,
    4061             :                 .dev_stripes    = 1,
    4062             :                 .devs_max       = 0,    /* 0 == as many as possible */
    4063             :                 .devs_min       = 4,
    4064             :                 .devs_increment = 2,
    4065             :                 .ncopies        = 2,
    4066             :         },
    4067             :         [BTRFS_RAID_RAID1] = {
    4068             :                 .sub_stripes    = 1,
    4069             :                 .dev_stripes    = 1,
    4070             :                 .devs_max       = 2,
    4071             :                 .devs_min       = 2,
    4072             :                 .devs_increment = 2,
    4073             :                 .ncopies        = 2,
    4074             :         },
    4075             :         [BTRFS_RAID_DUP] = {
    4076             :                 .sub_stripes    = 1,
    4077             :                 .dev_stripes    = 2,
    4078             :                 .devs_max       = 1,
    4079             :                 .devs_min       = 1,
    4080             :                 .devs_increment = 1,
    4081             :                 .ncopies        = 2,
    4082             :         },
    4083             :         [BTRFS_RAID_RAID0] = {
    4084             :                 .sub_stripes    = 1,
    4085             :                 .dev_stripes    = 1,
    4086             :                 .devs_max       = 0,
    4087             :                 .devs_min       = 2,
    4088             :                 .devs_increment = 1,
    4089             :                 .ncopies        = 1,
    4090             :         },
    4091             :         [BTRFS_RAID_SINGLE] = {
    4092             :                 .sub_stripes    = 1,
    4093             :                 .dev_stripes    = 1,
    4094             :                 .devs_max       = 1,
    4095             :                 .devs_min       = 1,
    4096             :                 .devs_increment = 1,
    4097             :                 .ncopies        = 1,
    4098             :         },
    4099             :         [BTRFS_RAID_RAID5] = {
    4100             :                 .sub_stripes    = 1,
    4101             :                 .dev_stripes    = 1,
    4102             :                 .devs_max       = 0,
    4103             :                 .devs_min       = 2,
    4104             :                 .devs_increment = 1,
    4105             :                 .ncopies        = 2,
    4106             :         },
    4107             :         [BTRFS_RAID_RAID6] = {
    4108             :                 .sub_stripes    = 1,
    4109             :                 .dev_stripes    = 1,
    4110             :                 .devs_max       = 0,
    4111             :                 .devs_min       = 3,
    4112             :                 .devs_increment = 1,
    4113             :                 .ncopies        = 3,
    4114             :         },
    4115             : };
    4116             : 
    4117             : static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
    4118             : {
    4119             :         /* TODO allow them to set a preferred stripe size */
    4120             :         return 64 * 1024;
    4121             : }
    4122             : 
    4123             : static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
    4124             : {
    4125          87 :         if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
    4126             :                 return;
    4127             : 
    4128           0 :         btrfs_set_fs_incompat(info, RAID56);
    4129             : }
    4130             : 
    4131             : #define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r)             \
    4132             :                         - sizeof(struct btrfs_item)             \
    4133             :                         - sizeof(struct btrfs_chunk))           \
    4134             :                         / sizeof(struct btrfs_stripe) + 1)
    4135             : 
    4136             : #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE        \
    4137             :                                 - 2 * sizeof(struct btrfs_disk_key)     \
    4138             :                                 - 2 * sizeof(struct btrfs_chunk))       \
    4139             :                                 / sizeof(struct btrfs_stripe) + 1)
    4140             : 
    4141          87 : static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
    4142             :                                struct btrfs_root *extent_root, u64 start,
    4143             :                                u64 type)
    4144             : {
    4145          87 :         struct btrfs_fs_info *info = extent_root->fs_info;
    4146          87 :         struct btrfs_fs_devices *fs_devices = info->fs_devices;
    4147             :         struct list_head *cur;
    4148             :         struct map_lookup *map = NULL;
    4149             :         struct extent_map_tree *em_tree;
    4150             :         struct extent_map *em;
    4151             :         struct btrfs_device_info *devices_info = NULL;
    4152             :         u64 total_avail;
    4153             :         int num_stripes;        /* total number of stripes to allocate */
    4154             :         int data_stripes;       /* number of stripes that count for
    4155             :                                    block group size */
    4156             :         int sub_stripes;        /* sub_stripes info for map */
    4157             :         int dev_stripes;        /* stripes per dev */
    4158             :         int devs_max;           /* max devs to use */
    4159             :         int devs_min;           /* min devs needed */
    4160             :         int devs_increment;     /* ndevs has to be a multiple of this */
    4161             :         int ncopies;            /* how many copies to data has */
    4162             :         int ret;
    4163             :         u64 max_stripe_size;
    4164             :         u64 max_chunk_size;
    4165             :         u64 stripe_size;
    4166             :         u64 num_bytes;
    4167             :         u64 raid_stripe_len = BTRFS_STRIPE_LEN;
    4168             :         int ndevs;
    4169             :         int i;
    4170             :         int j;
    4171             :         int index;
    4172             : 
    4173          87 :         BUG_ON(!alloc_profile_is_valid(type, 0));
    4174             : 
    4175         174 :         if (list_empty(&fs_devices->alloc_list))
    4176             :                 return -ENOSPC;
    4177             : 
    4178          87 :         index = __get_raid_index(type);
    4179             : 
    4180          87 :         sub_stripes = btrfs_raid_array[index].sub_stripes;
    4181          87 :         dev_stripes = btrfs_raid_array[index].dev_stripes;
    4182          87 :         devs_max = btrfs_raid_array[index].devs_max;
    4183          87 :         devs_min = btrfs_raid_array[index].devs_min;
    4184          87 :         devs_increment = btrfs_raid_array[index].devs_increment;
    4185          87 :         ncopies = btrfs_raid_array[index].ncopies;
    4186             : 
    4187          87 :         if (type & BTRFS_BLOCK_GROUP_DATA) {
    4188             :                 max_stripe_size = 1024 * 1024 * 1024;
    4189             :                 max_chunk_size = 10 * max_stripe_size;
    4190          42 :                 if (!devs_max)
    4191           0 :                         devs_max = BTRFS_MAX_DEVS(info->chunk_root);
    4192          45 :         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
    4193             :                 /* for larger filesystems, use larger metadata chunks */
    4194          23 :                 if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024)
    4195             :                         max_stripe_size = 1024 * 1024 * 1024;
    4196             :                 else
    4197             :                         max_stripe_size = 256 * 1024 * 1024;
    4198             :                 max_chunk_size = max_stripe_size;
    4199          23 :                 if (!devs_max)
    4200           0 :                         devs_max = BTRFS_MAX_DEVS(info->chunk_root);
    4201          22 :         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
    4202             :                 max_stripe_size = 32 * 1024 * 1024;
    4203             :                 max_chunk_size = 2 * max_stripe_size;
    4204          22 :                 if (!devs_max)
    4205             :                         devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
    4206             :         } else {
    4207           0 :                 btrfs_err(info, "invalid chunk type 0x%llx requested",
    4208             :                        type);
    4209           0 :                 BUG_ON(1);
    4210             :         }
    4211             : 
    4212             :         /* we don't want a chunk larger than 10% of writeable space */
    4213         174 :         max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
    4214             :                              max_chunk_size);
    4215             : 
    4216          87 :         devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
    4217             :                                GFP_NOFS);
    4218          87 :         if (!devices_info)
    4219             :                 return -ENOMEM;
    4220             : 
    4221          87 :         cur = fs_devices->alloc_list.next;
    4222             : 
    4223             :         /*
    4224             :          * in the first pass through the devices list, we gather information
    4225             :          * about the available holes on each device.
    4226             :          */
    4227             :         ndevs = 0;
    4228         263 :         while (cur != &fs_devices->alloc_list) {
    4229             :                 struct btrfs_device *device;
    4230             :                 u64 max_avail;
    4231             :                 u64 dev_offset;
    4232             : 
    4233          89 :                 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
    4234             : 
    4235          89 :                 cur = cur->next;
    4236             : 
    4237          89 :                 if (!device->writeable) {
    4238           0 :                         WARN(1, KERN_ERR
    4239             :                                "BTRFS: read-only device in alloc_list\n");
    4240           0 :                         continue;
    4241             :                 }
    4242             : 
    4243         178 :                 if (!device->in_fs_metadata ||
    4244          89 :                     device->is_tgtdev_for_dev_replace)
    4245           0 :                         continue;
    4246             : 
    4247          89 :                 if (device->total_bytes > device->bytes_used)
    4248          89 :                         total_avail = device->total_bytes - device->bytes_used;
    4249             :                 else
    4250             :                         total_avail = 0;
    4251             : 
    4252             :                 /* If there is no space on this device, skip it. */
    4253          89 :                 if (total_avail == 0)
    4254           0 :                         continue;
    4255             : 
    4256          89 :                 ret = find_free_dev_extent(trans, device,
    4257             :                                            max_stripe_size * dev_stripes,
    4258             :                                            &dev_offset, &max_avail);
    4259          89 :                 if (ret && ret != -ENOSPC)
    4260             :                         goto error;
    4261             : 
    4262          89 :                 if (ret == 0)
    4263          86 :                         max_avail = max_stripe_size * dev_stripes;
    4264             : 
    4265          89 :                 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
    4266           0 :                         continue;
    4267             : 
    4268          89 :                 if (ndevs == fs_devices->rw_devices) {
    4269           0 :                         WARN(1, "%s: found more than %llu devices\n",
    4270             :                              __func__, fs_devices->rw_devices);
    4271           0 :                         break;
    4272             :                 }
    4273          89 :                 devices_info[ndevs].dev_offset = dev_offset;
    4274          89 :                 devices_info[ndevs].max_avail = max_avail;
    4275          89 :                 devices_info[ndevs].total_avail = total_avail;
    4276          89 :                 devices_info[ndevs].dev = device;
    4277          89 :                 ++ndevs;
    4278             :         }
    4279             : 
    4280             :         /*
    4281             :          * now sort the devices by hole size / available space
    4282             :          */
    4283          87 :         sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
    4284             :              btrfs_cmp_device_info, NULL);
    4285             : 
    4286             :         /* round down to number of usable stripes */
    4287          87 :         ndevs -= ndevs % devs_increment;
    4288             : 
    4289          87 :         if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
    4290             :                 ret = -ENOSPC;
    4291             :                 goto error;
    4292             :         }
    4293             : 
    4294          87 :         if (devs_max && ndevs > devs_max)
    4295             :                 ndevs = devs_max;
    4296             :         /*
    4297             :          * the primary goal is to maximize the number of stripes, so use as many
    4298             :          * devices as possible, even if the stripes are not maximum sized.
    4299             :          */
    4300          87 :         stripe_size = devices_info[ndevs-1].max_avail;
    4301          87 :         num_stripes = ndevs * dev_stripes;
    4302             : 
    4303             :         /*
    4304             :          * this will have to be fixed for RAID1 and RAID10 over
    4305             :          * more drives
    4306             :          */
    4307          87 :         data_stripes = num_stripes / ncopies;
    4308             : 
    4309          87 :         if (type & BTRFS_BLOCK_GROUP_RAID5) {
    4310             :                 raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
    4311             :                                  btrfs_super_stripesize(info->super_copy));
    4312           0 :                 data_stripes = num_stripes - 1;
    4313             :         }
    4314          87 :         if (type & BTRFS_BLOCK_GROUP_RAID6) {
    4315             :                 raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
    4316             :                                  btrfs_super_stripesize(info->super_copy));
    4317           0 :                 data_stripes = num_stripes - 2;
    4318             :         }
    4319             : 
    4320             :         /*
    4321             :          * Use the number of data stripes to figure out how big this chunk
    4322             :          * is really going to be in terms of logical address space,
    4323             :          * and compare that answer with the max chunk size
    4324             :          */
    4325          87 :         if (stripe_size * data_stripes > max_chunk_size) {
    4326             :                 u64 mask = (1ULL << 24) - 1;
    4327             :                 stripe_size = max_chunk_size;
    4328          26 :                 do_div(stripe_size, data_stripes);
    4329             : 
    4330             :                 /* bump the answer up to a 16MB boundary */
    4331          26 :                 stripe_size = (stripe_size + mask) & ~mask;
    4332             : 
    4333             :                 /* but don't go higher than the limits we found
    4334             :                  * while searching for free extents
    4335             :                  */
    4336          26 :                 if (stripe_size > devices_info[ndevs-1].max_avail)
    4337             :                         stripe_size = devices_info[ndevs-1].max_avail;
    4338             :         }
    4339             : 
    4340          87 :         do_div(stripe_size, dev_stripes);
    4341             : 
    4342             :         /* align to BTRFS_STRIPE_LEN */
    4343          87 :         do_div(stripe_size, raid_stripe_len);
    4344          87 :         stripe_size *= raid_stripe_len;
    4345             : 
    4346          87 :         map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
    4347          87 :         if (!map) {
    4348             :                 ret = -ENOMEM;
    4349             :                 goto error;
    4350             :         }
    4351          87 :         map->num_stripes = num_stripes;
    4352             : 
    4353         176 :         for (i = 0; i < ndevs; ++i) {
    4354         133 :                 for (j = 0; j < dev_stripes; ++j) {
    4355         133 :                         int s = i * dev_stripes + j;
    4356         133 :                         map->stripes[s].dev = devices_info[i].dev;
    4357         266 :                         map->stripes[s].physical = devices_info[i].dev_offset +
    4358         133 :                                                    j * stripe_size;
    4359             :                 }
    4360             :         }
    4361          87 :         map->sector_size = extent_root->sectorsize;
    4362          87 :         map->stripe_len = raid_stripe_len;
    4363          87 :         map->io_align = raid_stripe_len;
    4364          87 :         map->io_width = raid_stripe_len;
    4365          87 :         map->type = type;
    4366          87 :         map->sub_stripes = sub_stripes;
    4367             : 
    4368          87 :         num_bytes = stripe_size * data_stripes;
    4369             : 
    4370          87 :         trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
    4371             : 
    4372          87 :         em = alloc_extent_map();
    4373          87 :         if (!em) {
    4374           0 :                 kfree(map);
    4375             :                 ret = -ENOMEM;
    4376           0 :                 goto error;
    4377             :         }
    4378             :         set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
    4379          87 :         em->bdev = (struct block_device *)map;
    4380          87 :         em->start = start;
    4381          87 :         em->len = num_bytes;
    4382          87 :         em->block_start = 0;
    4383          87 :         em->block_len = em->len;
    4384          87 :         em->orig_block_len = stripe_size;
    4385             : 
    4386          87 :         em_tree = &extent_root->fs_info->mapping_tree.map_tree;
    4387          87 :         write_lock(&em_tree->lock);
    4388          87 :         ret = add_extent_mapping(em_tree, em, 0);
    4389          87 :         if (!ret) {
    4390          87 :                 list_add_tail(&em->list, &trans->transaction->pending_chunks);
    4391          87 :                 atomic_inc(&em->refs);
    4392             :         }
    4393             :         write_unlock(&em_tree->lock);
    4394          87 :         if (ret) {
    4395           0 :                 free_extent_map(em);
    4396           0 :                 goto error;
    4397             :         }
    4398             : 
    4399          87 :         ret = btrfs_make_block_group(trans, extent_root, 0, type,
    4400             :                                      BTRFS_FIRST_CHUNK_TREE_OBJECTID,
    4401             :                                      start, num_bytes);
    4402          87 :         if (ret)
    4403             :                 goto error_del_extent;
    4404             : 
    4405          87 :         free_extent_map(em);
    4406          87 :         check_raid56_incompat_flag(extent_root->fs_info, type);
    4407             : 
    4408          87 :         kfree(devices_info);
    4409          87 :         return 0;
    4410             : 
    4411             : error_del_extent:
    4412           0 :         write_lock(&em_tree->lock);
    4413           0 :         remove_extent_mapping(em_tree, em);
    4414             :         write_unlock(&em_tree->lock);
    4415             : 
    4416             :         /* One for our allocation */
    4417           0 :         free_extent_map(em);
    4418             :         /* One for the tree reference */
    4419           0 :         free_extent_map(em);
    4420             : error:
    4421           0 :         kfree(devices_info);
    4422           0 :         return ret;
    4423             : }
    4424             : 
    4425          87 : int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
    4426             :                                 struct btrfs_root *extent_root,
    4427             :                                 u64 chunk_offset, u64 chunk_size)
    4428             : {
    4429             :         struct btrfs_key key;
    4430         109 :         struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
    4431             :         struct btrfs_device *device;
    4432             :         struct btrfs_chunk *chunk;
    4433             :         struct btrfs_stripe *stripe;
    4434             :         struct extent_map_tree *em_tree;
    4435             :         struct extent_map *em;
    4436             :         struct map_lookup *map;
    4437             :         size_t item_size;
    4438             :         u64 dev_offset;
    4439             :         u64 stripe_size;
    4440             :         int i = 0;
    4441             :         int ret;
    4442             : 
    4443          87 :         em_tree = &extent_root->fs_info->mapping_tree.map_tree;
    4444          87 :         read_lock(&em_tree->lock);
    4445          87 :         em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
    4446             :         read_unlock(&em_tree->lock);
    4447             : 
    4448          87 :         if (!em) {
    4449           0 :                 btrfs_crit(extent_root->fs_info, "unable to find logical "
    4450             :                            "%Lu len %Lu", chunk_offset, chunk_size);
    4451           0 :                 return -EINVAL;
    4452             :         }
    4453             : 
    4454          87 :         if (em->start != chunk_offset || em->len != chunk_size) {
    4455           0 :                 btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted"
    4456             :                           " %Lu-%Lu, found %Lu-%Lu", chunk_offset,
    4457             :                           chunk_size, em->start, em->len);
    4458           0 :                 free_extent_map(em);
    4459           0 :                 return -EINVAL;
    4460             :         }
    4461             : 
    4462          87 :         map = (struct map_lookup *)em->bdev;
    4463          87 :         item_size = btrfs_chunk_item_size(map->num_stripes);
    4464          87 :         stripe_size = em->orig_block_len;
    4465             : 
    4466          87 :         chunk = kzalloc(item_size, GFP_NOFS);
    4467          87 :         if (!chunk) {
    4468             :                 ret = -ENOMEM;
    4469             :                 goto out;
    4470             :         }
    4471             : 
    4472         133 :         for (i = 0; i < map->num_stripes; i++) {
    4473         133 :                 device = map->stripes[i].dev;
    4474         133 :                 dev_offset = map->stripes[i].physical;
    4475             : 
    4476         133 :                 device->bytes_used += stripe_size;
    4477         133 :                 ret = btrfs_update_device(trans, device);
    4478         133 :                 if (ret)
    4479             :                         goto out;
    4480         133 :                 ret = btrfs_alloc_dev_extent(trans, device,
    4481             :                                              chunk_root->root_key.objectid,
    4482             :                                              BTRFS_FIRST_CHUNK_TREE_OBJECTID,
    4483             :                                              chunk_offset, dev_offset,
    4484             :                                              stripe_size);
    4485         133 :                 if (ret)
    4486             :                         goto out;
    4487             :         }
    4488             : 
    4489          87 :         spin_lock(&extent_root->fs_info->free_chunk_lock);
    4490         174 :         extent_root->fs_info->free_chunk_space -= (stripe_size *
    4491          87 :                                                    map->num_stripes);
    4492          87 :         spin_unlock(&extent_root->fs_info->free_chunk_lock);
    4493             : 
    4494          87 :         stripe = &chunk->stripe;
    4495         220 :         for (i = 0; i < map->num_stripes; i++) {
    4496         133 :                 device = map->stripes[i].dev;
    4497         133 :                 dev_offset = map->stripes[i].physical;
    4498             : 
    4499         133 :                 btrfs_set_stack_stripe_devid(stripe, device->devid);
    4500             :                 btrfs_set_stack_stripe_offset(stripe, dev_offset);
    4501         133 :                 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
    4502         133 :                 stripe++;
    4503             :         }
    4504             : 
    4505             :         btrfs_set_stack_chunk_length(chunk, chunk_size);
    4506          87 :         btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
    4507          87 :         btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
    4508          87 :         btrfs_set_stack_chunk_type(chunk, map->type);
    4509          87 :         btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
    4510          87 :         btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
    4511          87 :         btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
    4512          87 :         btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
    4513          87 :         btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
    4514             : 
    4515          87 :         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
    4516          87 :         key.type = BTRFS_CHUNK_ITEM_KEY;
    4517          87 :         key.offset = chunk_offset;
    4518             : 
    4519          87 :         ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
    4520          87 :         if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
    4521             :                 /*
    4522             :                  * TODO: Cleanup of inserted chunk root in case of
    4523             :                  * failure.
    4524             :                  */
    4525          44 :                 ret = btrfs_add_system_chunk(chunk_root, &key, chunk,
    4526             :                                              item_size);
    4527             :         }
    4528             : 
    4529             : out:
    4530          87 :         kfree(chunk);
    4531          87 :         free_extent_map(em);
    4532          87 :         return ret;
    4533             : }
    4534             : 
    4535             : /*
    4536             :  * Chunk allocation falls into two parts. The first part does works
    4537             :  * that make the new allocated chunk useable, but not do any operation
    4538             :  * that modifies the chunk tree. The second part does the works that
    4539             :  * require modifying the chunk tree. This division is important for the
    4540             :  * bootstrap process of adding storage to a seed btrfs.
    4541             :  */
    4542          87 : int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
    4543             :                       struct btrfs_root *extent_root, u64 type)
    4544             : {
    4545             :         u64 chunk_offset;
    4546             : 
    4547          87 :         chunk_offset = find_next_chunk(extent_root->fs_info);
    4548          87 :         return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
    4549             : }
    4550             : 
    4551           0 : static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
    4552             :                                          struct btrfs_root *root,
    4553             :                                          struct btrfs_device *device)
    4554             : {
    4555             :         u64 chunk_offset;
    4556             :         u64 sys_chunk_offset;
    4557             :         u64 alloc_profile;
    4558           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4559           0 :         struct btrfs_root *extent_root = fs_info->extent_root;
    4560             :         int ret;
    4561             : 
    4562           0 :         chunk_offset = find_next_chunk(fs_info);
    4563           0 :         alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
    4564           0 :         ret = __btrfs_alloc_chunk(trans, extent_root, chunk_offset,
    4565             :                                   alloc_profile);
    4566           0 :         if (ret)
    4567             :                 return ret;
    4568             : 
    4569           0 :         sys_chunk_offset = find_next_chunk(root->fs_info);
    4570           0 :         alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
    4571           0 :         ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset,
    4572             :                                   alloc_profile);
    4573           0 :         if (ret) {
    4574           0 :                 btrfs_abort_transaction(trans, root, ret);
    4575           0 :                 goto out;
    4576             :         }
    4577             : 
    4578           0 :         ret = btrfs_add_device(trans, fs_info->chunk_root, device);
    4579           0 :         if (ret)
    4580           0 :                 btrfs_abort_transaction(trans, root, ret);
    4581             : out:
    4582           0 :         return ret;
    4583             : }
    4584             : 
    4585        1141 : int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
    4586             : {
    4587             :         struct extent_map *em;
    4588             :         struct map_lookup *map;
    4589        1141 :         struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
    4590             :         int readonly = 0;
    4591             :         int i;
    4592             : 
    4593        1141 :         read_lock(&map_tree->map_tree.lock);
    4594        1141 :         em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
    4595             :         read_unlock(&map_tree->map_tree.lock);
    4596        1141 :         if (!em)
    4597             :                 return 1;
    4598             : 
    4599        1141 :         if (btrfs_test_opt(root, DEGRADED)) {
    4600           0 :                 free_extent_map(em);
    4601           0 :                 return 0;
    4602             :         }
    4603             : 
    4604        1141 :         map = (struct map_lookup *)em->bdev;
    4605        2757 :         for (i = 0; i < map->num_stripes; i++) {
    4606        1616 :                 if (!map->stripes[i].dev->writeable) {
    4607             :                         readonly = 1;
    4608             :                         break;
    4609             :                 }
    4610             :         }
    4611        1141 :         free_extent_map(em);
    4612        1141 :         return readonly;
    4613             : }
    4614             : 
    4615         221 : void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
    4616             : {
    4617         221 :         extent_map_tree_init(&tree->map_tree);
    4618         221 : }
    4619             : 
    4620         221 : void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
    4621             : {
    4622             :         struct extent_map *em;
    4623             : 
    4624             :         while (1) {
    4625        1377 :                 write_lock(&tree->map_tree.lock);
    4626        1377 :                 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
    4627        1377 :                 if (em)
    4628        1156 :                         remove_extent_mapping(&tree->map_tree, em);
    4629             :                 write_unlock(&tree->map_tree.lock);
    4630        1377 :                 if (!em)
    4631             :                         break;
    4632             :                 /* once for us */
    4633        1156 :                 free_extent_map(em);
    4634             :                 /* once for the tree */
    4635        1156 :                 free_extent_map(em);
    4636        1156 :         }
    4637         221 : }
    4638             : 
    4639           0 : int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
    4640             : {
    4641             :         struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
    4642             :         struct extent_map *em;
    4643             :         struct map_lookup *map;
    4644           0 :         struct extent_map_tree *em_tree = &map_tree->map_tree;
    4645             :         int ret;
    4646             : 
    4647           0 :         read_lock(&em_tree->lock);
    4648           0 :         em = lookup_extent_mapping(em_tree, logical, len);
    4649             :         read_unlock(&em_tree->lock);
    4650             : 
    4651             :         /*
    4652             :          * We could return errors for these cases, but that could get ugly and
    4653             :          * we'd probably do the same thing which is just not do anything else
    4654             :          * and exit, so return 1 so the callers don't try to use other copies.
    4655             :          */
    4656           0 :         if (!em) {
    4657           0 :                 btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical,
    4658             :                             logical+len);
    4659           0 :                 return 1;
    4660             :         }
    4661             : 
    4662           0 :         if (em->start > logical || em->start + em->len < logical) {
    4663           0 :                 btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
    4664             :                             "%Lu-%Lu", logical, logical+len, em->start,
    4665             :                             em->start + em->len);
    4666           0 :                 free_extent_map(em);
    4667           0 :                 return 1;
    4668             :         }
    4669             : 
    4670           0 :         map = (struct map_lookup *)em->bdev;
    4671           0 :         if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
    4672           0 :                 ret = map->num_stripes;
    4673           0 :         else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
    4674           0 :                 ret = map->sub_stripes;
    4675           0 :         else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
    4676             :                 ret = 2;
    4677           0 :         else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
    4678             :                 ret = 3;
    4679             :         else
    4680             :                 ret = 1;
    4681           0 :         free_extent_map(em);
    4682             : 
    4683           0 :         btrfs_dev_replace_lock(&fs_info->dev_replace);
    4684           0 :         if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
    4685           0 :                 ret++;
    4686           0 :         btrfs_dev_replace_unlock(&fs_info->dev_replace);
    4687             : 
    4688           0 :         return ret;
    4689             : }
    4690             : 
    4691        1228 : unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
    4692             :                                     struct btrfs_mapping_tree *map_tree,
    4693             :                                     u64 logical)
    4694             : {
    4695             :         struct extent_map *em;
    4696             :         struct map_lookup *map;
    4697        1228 :         struct extent_map_tree *em_tree = &map_tree->map_tree;
    4698        1228 :         unsigned long len = root->sectorsize;
    4699             : 
    4700        1228 :         read_lock(&em_tree->lock);
    4701        1228 :         em = lookup_extent_mapping(em_tree, logical, len);
    4702             :         read_unlock(&em_tree->lock);
    4703        1228 :         BUG_ON(!em);
    4704             : 
    4705        1228 :         BUG_ON(em->start > logical || em->start + em->len < logical);
    4706        1228 :         map = (struct map_lookup *)em->bdev;
    4707        1228 :         if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
    4708             :                          BTRFS_BLOCK_GROUP_RAID6)) {
    4709          12 :                 len = map->stripe_len * nr_data_stripes(map);
    4710             :         }
    4711        1228 :         free_extent_map(em);
    4712        1228 :         return len;
    4713             : }
    4714             : 
    4715           0 : int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
    4716             :                            u64 logical, u64 len, int mirror_num)
    4717             : {
    4718             :         struct extent_map *em;
    4719             :         struct map_lookup *map;
    4720           0 :         struct extent_map_tree *em_tree = &map_tree->map_tree;
    4721             :         int ret = 0;
    4722             : 
    4723           0 :         read_lock(&em_tree->lock);
    4724           0 :         em = lookup_extent_mapping(em_tree, logical, len);
    4725             :         read_unlock(&em_tree->lock);
    4726           0 :         BUG_ON(!em);
    4727             : 
    4728           0 :         BUG_ON(em->start > logical || em->start + em->len < logical);
    4729           0 :         map = (struct map_lookup *)em->bdev;
    4730           0 :         if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
    4731             :                          BTRFS_BLOCK_GROUP_RAID6))
    4732             :                 ret = 1;
    4733           0 :         free_extent_map(em);
    4734           0 :         return ret;
    4735             : }
    4736             : 
    4737       70116 : static int find_live_mirror(struct btrfs_fs_info *fs_info,
    4738             :                             struct map_lookup *map, int first, int num,
    4739             :                             int optimal, int dev_replace_is_ongoing)
    4740             : {
    4741             :         int i;
    4742             :         int tolerance;
    4743             :         struct btrfs_device *srcdev;
    4744             : 
    4745      140126 :         if (dev_replace_is_ongoing &&
    4746       70010 :             fs_info->dev_replace.cont_reading_from_srcdev_mode ==
    4747             :              BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
    4748           0 :                 srcdev = fs_info->dev_replace.srcdev;
    4749             :         else
    4750             :                 srcdev = NULL;
    4751             : 
    4752             :         /*
    4753             :          * try to avoid the drive that is the source drive for a
    4754             :          * dev-replace procedure, only choose it if no other non-missing
    4755             :          * mirror is available
    4756             :          */
    4757           0 :         for (tolerance = 0; tolerance < 2; tolerance++) {
    4758       70116 :                 if (map->stripes[optimal].dev->bdev &&
    4759       70116 :                     (tolerance || map->stripes[optimal].dev != srcdev))
    4760             :                         return optimal;
    4761           0 :                 for (i = first; i < first + num; i++) {
    4762           0 :                         if (map->stripes[i].dev->bdev &&
    4763           0 :                             (tolerance || map->stripes[i].dev != srcdev))
    4764             :                                 return i;
    4765             :                 }
    4766             :         }
    4767             : 
    4768             :         /* we couldn't find one that doesn't fail.  Just return something
    4769             :          * and the io error handling code will clean up eventually
    4770             :          */
    4771             :         return optimal;
    4772             : }
    4773             : 
    4774             : static inline int parity_smaller(u64 a, u64 b)
    4775             : {
    4776             :         return a > b;
    4777             : }
    4778             : 
    4779             : /* Bubble-sort the stripe set to put the parity/syndrome stripes last */
    4780          38 : static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
    4781             : {
    4782             :         struct btrfs_bio_stripe s;
    4783             :         int i;
    4784             :         u64 l;
    4785             :         int again = 1;
    4786             : 
    4787         165 :         while (again) {
    4788             :                 again = 0;
    4789         267 :                 for (i = 0; i < bbio->num_stripes - 1; i++) {
    4790         267 :                         if (parity_smaller(raid_map[i], raid_map[i+1])) {
    4791          89 :                                 s = bbio->stripes[i];
    4792             :                                 l = raid_map[i];
    4793          89 :                                 bbio->stripes[i] = bbio->stripes[i+1];
    4794          89 :                                 raid_map[i] = raid_map[i+1];
    4795          89 :                                 bbio->stripes[i+1] = s;
    4796          89 :                                 raid_map[i+1] = l;
    4797             :                                 again = 1;
    4798             :                         }
    4799             :                 }
    4800             :         }
    4801          38 : }
    4802             : 
    4803     1770950 : static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
    4804             :                              u64 logical, u64 *length,
    4805             :                              struct btrfs_bio **bbio_ret,
    4806             :                              int mirror_num, u64 **raid_map_ret)
    4807             : {
    4808             :         struct extent_map *em;
    4809             :         struct map_lookup *map;
    4810             :         struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
    4811     1770950 :         struct extent_map_tree *em_tree = &map_tree->map_tree;
    4812             :         u64 offset;
    4813             :         u64 stripe_offset;
    4814             :         u64 stripe_end_offset;
    4815             :         u64 stripe_nr;
    4816             :         u64 stripe_nr_orig;
    4817             :         u64 stripe_nr_end;
    4818             :         u64 stripe_len;
    4819             :         u64 *raid_map = NULL;
    4820             :         int stripe_index;
    4821             :         int i;
    4822             :         int ret = 0;
    4823             :         int num_stripes;
    4824             :         int max_errors = 0;
    4825             :         struct btrfs_bio *bbio = NULL;
    4826     1770950 :         struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
    4827             :         int dev_replace_is_ongoing = 0;
    4828             :         int num_alloc_stripes;
    4829             :         int patch_the_first_stripe_for_dev_replace = 0;
    4830             :         u64 physical_to_patch_in_first_stripe = 0;
    4831             :         u64 raid56_full_stripe_start = (u64)-1;
    4832             : 
    4833     1770950 :         read_lock(&em_tree->lock);
    4834     1771455 :         em = lookup_extent_mapping(em_tree, logical, *length);
    4835             :         read_unlock(&em_tree->lock);
    4836             : 
    4837     1772091 :         if (!em) {
    4838           0 :                 btrfs_crit(fs_info, "unable to find logical %llu len %llu",
    4839             :                         logical, *length);
    4840           0 :                 return -EINVAL;
    4841             :         }
    4842             : 
    4843     1772091 :         if (em->start > logical || em->start + em->len < logical) {
    4844           0 :                 btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
    4845             :                            "found %Lu-%Lu", logical, em->start,
    4846             :                            em->start + em->len);
    4847           0 :                 free_extent_map(em);
    4848           0 :                 return -EINVAL;
    4849             :         }
    4850             : 
    4851     1772139 :         map = (struct map_lookup *)em->bdev;
    4852     1772139 :         offset = logical - em->start;
    4853             : 
    4854     1772139 :         stripe_len = map->stripe_len;
    4855             :         stripe_nr = offset;
    4856             :         /*
    4857             :          * stripe_nr counts the total number of stripes we have to stride
    4858             :          * to get to this block
    4859             :          */
    4860     1772139 :         do_div(stripe_nr, stripe_len);
    4861             : 
    4862     1772139 :         stripe_offset = stripe_nr * stripe_len;
    4863     1772139 :         BUG_ON(offset < stripe_offset);
    4864             : 
    4865             :         /* stripe_offset is the offset of this block in its stripe*/
    4866     1772139 :         stripe_offset = offset - stripe_offset;
    4867             : 
    4868             :         /* if we're here for raid56, we need to know the stripe aligned start */
    4869     1772139 :         if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
    4870        1418 :                 unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
    4871             :                 raid56_full_stripe_start = offset;
    4872             : 
    4873             :                 /* allow a write of a full stripe, but make sure we don't
    4874             :                  * allow straddling of stripes
    4875             :                  */
    4876        1418 :                 do_div(raid56_full_stripe_start, full_stripe_len);
    4877        1418 :                 raid56_full_stripe_start *= full_stripe_len;
    4878             :         }
    4879             : 
    4880     1772139 :         if (rw & REQ_DISCARD) {
    4881             :                 /* we don't discard raid56 yet */
    4882           0 :                 if (map->type &
    4883             :                     (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
    4884             :                         ret = -EOPNOTSUPP;
    4885             :                         goto out;
    4886             :                 }
    4887           0 :                 *length = min_t(u64, em->len - offset, *length);
    4888     1772139 :         } else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
    4889             :                 u64 max_len;
    4890             :                 /* For writes to RAID[56], allow a full stripeset across all disks.
    4891             :                    For other RAID types and for RAID[56] reads, just allow a single
    4892             :                    stripe (on a single disk). */
    4893      972711 :                 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) &&
    4894        1418 :                     (rw & REQ_WRITE)) {
    4895        2678 :                         max_len = stripe_len * nr_data_stripes(map) -
    4896        1339 :                                 (offset - raid56_full_stripe_start);
    4897             :                 } else {
    4898             :                         /* we limit the length of each bio to what fits in a stripe */
    4899      969954 :                         max_len = stripe_len - stripe_offset;
    4900             :                 }
    4901      971293 :                 *length = min_t(u64, em->len - offset, max_len);
    4902             :         } else {
    4903      800846 :                 *length = em->len - offset;
    4904             :         }
    4905             : 
    4906             :         /* This is for when we're called from btrfs_merge_bio_hook() and all
    4907             :            it cares about is the length */
    4908     1772139 :         if (!bbio_ret)
    4909             :                 goto out;
    4910             : 
    4911      251815 :         btrfs_dev_replace_lock(dev_replace);
    4912      251823 :         dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
    4913      251823 :         if (!dev_replace_is_ongoing)
    4914      157988 :                 btrfs_dev_replace_unlock(dev_replace);
    4915             : 
    4916      251821 :         if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
    4917           0 :             !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
    4918           0 :             dev_replace->tgtdev != NULL) {
    4919             :                 /*
    4920             :                  * in dev-replace case, for repair case (that's the only
    4921             :                  * case where the mirror is selected explicitly when
    4922             :                  * calling btrfs_map_block), blocks left of the left cursor
    4923             :                  * can also be read from the target drive.
    4924             :                  * For REQ_GET_READ_MIRRORS, the target drive is added as
    4925             :                  * the last one to the array of stripes. For READ, it also
    4926             :                  * needs to be supported using the same mirror number.
    4927             :                  * If the requested block is not left of the left cursor,
    4928             :                  * EIO is returned. This can happen because btrfs_num_copies()
    4929             :                  * returns one more in the dev-replace case.
    4930             :                  */
    4931           0 :                 u64 tmp_length = *length;
    4932           0 :                 struct btrfs_bio *tmp_bbio = NULL;
    4933             :                 int tmp_num_stripes;
    4934           0 :                 u64 srcdev_devid = dev_replace->srcdev->devid;
    4935             :                 int index_srcdev = 0;
    4936             :                 int found = 0;
    4937             :                 u64 physical_of_found = 0;
    4938             : 
    4939           0 :                 ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
    4940             :                              logical, &tmp_length, &tmp_bbio, 0, NULL);
    4941           0 :                 if (ret) {
    4942           0 :                         WARN_ON(tmp_bbio != NULL);
    4943           0 :                         goto out;
    4944             :                 }
    4945             : 
    4946           0 :                 tmp_num_stripes = tmp_bbio->num_stripes;
    4947           0 :                 if (mirror_num > tmp_num_stripes) {
    4948             :                         /*
    4949             :                          * REQ_GET_READ_MIRRORS does not contain this
    4950             :                          * mirror, that means that the requested area
    4951             :                          * is not left of the left cursor
    4952             :                          */
    4953             :                         ret = -EIO;
    4954           0 :                         kfree(tmp_bbio);
    4955           0 :                         goto out;
    4956             :                 }
    4957             : 
    4958             :                 /*
    4959             :                  * process the rest of the function using the mirror_num
    4960             :                  * of the source drive. Therefore look it up first.
    4961             :                  * At the end, patch the device pointer to the one of the
    4962             :                  * target drive.
    4963             :                  */
    4964           0 :                 for (i = 0; i < tmp_num_stripes; i++) {
    4965           0 :                         if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
    4966             :                                 /*
    4967             :                                  * In case of DUP, in order to keep it
    4968             :                                  * simple, only add the mirror with the
    4969             :                                  * lowest physical address
    4970             :                                  */
    4971           0 :                                 if (found &&
    4972             :                                     physical_of_found <=
    4973           0 :                                      tmp_bbio->stripes[i].physical)
    4974           0 :                                         continue;
    4975             :                                 index_srcdev = i;
    4976             :                                 found = 1;
    4977           0 :                                 physical_of_found =
    4978             :                                         tmp_bbio->stripes[i].physical;
    4979             :                         }
    4980             :                 }
    4981             : 
    4982           0 :                 if (found) {
    4983           0 :                         mirror_num = index_srcdev + 1;
    4984             :                         patch_the_first_stripe_for_dev_replace = 1;
    4985             :                         physical_to_patch_in_first_stripe = physical_of_found;
    4986             :                 } else {
    4987           0 :                         WARN_ON(1);
    4988             :                         ret = -EIO;
    4989           0 :                         kfree(tmp_bbio);
    4990           0 :                         goto out;
    4991             :                 }
    4992             : 
    4993           0 :                 kfree(tmp_bbio);
    4994      251821 :         } else if (mirror_num > map->num_stripes) {
    4995             :                 mirror_num = 0;
    4996             :         }
    4997             : 
    4998             :         num_stripes = 1;
    4999             :         stripe_index = 0;
    5000             :         stripe_nr_orig = stripe_nr;
    5001      251821 :         stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
    5002      251821 :         do_div(stripe_nr_end, map->stripe_len);
    5003      251821 :         stripe_end_offset = stripe_nr_end * map->stripe_len -
    5004             :                             (offset + *length);
    5005             : 
    5006      251821 :         if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
    5007        3359 :                 if (rw & REQ_DISCARD)
    5008           0 :                         num_stripes = min_t(u64, map->num_stripes,
    5009             :                                             stripe_nr_end - stripe_nr_orig);
    5010        3359 :                 stripe_index = do_div(stripe_nr, map->num_stripes);
    5011      248462 :         } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
    5012      111765 :                 if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))
    5013       41448 :                         num_stripes = map->num_stripes;
    5014       70317 :                 else if (mirror_num)
    5015         208 :                         stripe_index = mirror_num - 1;
    5016             :                 else {
    5017       70109 :                         stripe_index = find_live_mirror(fs_info, map, 0,
    5018             :                                             map->num_stripes,
    5019       70109 :                                             current->pid % map->num_stripes,
    5020             :                                             dev_replace_is_ongoing);
    5021       70109 :                         mirror_num = stripe_index + 1;
    5022             :                 }
    5023             : 
    5024      136697 :         } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
    5025       29813 :                 if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) {
    5026       21348 :                         num_stripes = map->num_stripes;
    5027        8465 :                 } else if (mirror_num) {
    5028           0 :                         stripe_index = mirror_num - 1;
    5029             :                 } else {
    5030             :                         mirror_num = 1;
    5031             :                 }
    5032             : 
    5033      106884 :         } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
    5034          43 :                 int factor = map->num_stripes / map->sub_stripes;
    5035             : 
    5036          43 :                 stripe_index = do_div(stripe_nr, factor);
    5037          43 :                 stripe_index *= map->sub_stripes;
    5038             : 
    5039          43 :                 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
    5040             :                         num_stripes = map->sub_stripes;
    5041           7 :                 else if (rw & REQ_DISCARD)
    5042           0 :                         num_stripes = min_t(u64, map->sub_stripes *
    5043             :                                             (stripe_nr_end - stripe_nr_orig),
    5044             :                                             map->num_stripes);
    5045           7 :                 else if (mirror_num)
    5046           0 :                         stripe_index += mirror_num - 1;
    5047             :                 else {
    5048             :                         int old_stripe_index = stripe_index;
    5049           7 :                         stripe_index = find_live_mirror(fs_info, map,
    5050             :                                               stripe_index,
    5051             :                                               map->sub_stripes, stripe_index +
    5052           7 :                                               current->pid % map->sub_stripes,
    5053             :                                               dev_replace_is_ongoing);
    5054           7 :                         mirror_num = stripe_index - old_stripe_index + 1;
    5055             :                 }
    5056             : 
    5057      106841 :         } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
    5058             :                                 BTRFS_BLOCK_GROUP_RAID6)) {
    5059             :                 u64 tmp;
    5060             : 
    5061          52 :                 if (bbio_ret && ((rw & REQ_WRITE) || mirror_num > 1)
    5062          76 :                     && raid_map_ret) {
    5063             :                         int i, rot;
    5064             : 
    5065             :                         /* push stripe_nr back to the start of the full stripe */
    5066             :                         stripe_nr = raid56_full_stripe_start;
    5067          38 :                         do_div(stripe_nr, stripe_len);
    5068             : 
    5069          38 :                         stripe_index = do_div(stripe_nr, nr_data_stripes(map));
    5070             : 
    5071             :                         /* RAID[56] write or recovery. Return all stripes */
    5072             :                         num_stripes = map->num_stripes;
    5073             :                         max_errors = nr_parity_stripes(map);
    5074             : 
    5075          38 :                         raid_map = kmalloc_array(num_stripes, sizeof(u64),
    5076             :                                            GFP_NOFS);
    5077           0 :                         if (!raid_map) {
    5078             :                                 ret = -ENOMEM;
    5079             :                                 goto out;
    5080             :                         }
    5081             : 
    5082             :                         /* Work out the disk rotation on this stripe-set */
    5083             :                         tmp = stripe_nr;
    5084          38 :                         rot = do_div(tmp, num_stripes);
    5085             : 
    5086             :                         /* Fill in the logical address of each stripe */
    5087          38 :                         tmp = stripe_nr * nr_data_stripes(map);
    5088         266 :                         for (i = 0; i < nr_data_stripes(map); i++)
    5089         190 :                                 raid_map[(i+rot) % num_stripes] =
    5090          95 :                                         em->start + (tmp + i) * map->stripe_len;
    5091             : 
    5092          38 :                         raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
    5093          38 :                         if (map->type & BTRFS_BLOCK_GROUP_RAID6)
    5094          19 :                                 raid_map[(i+rot+1) % num_stripes] =
    5095             :                                         RAID6_Q_STRIPE;
    5096             : 
    5097          38 :                         *length = map->stripe_len;
    5098             :                         stripe_index = 0;
    5099             :                         stripe_offset = 0;
    5100             :                 } else {
    5101             :                         /*
    5102             :                          * Mirror #0 or #1 means the original data block.
    5103             :                          * Mirror #2 is RAID5 parity block.
    5104             :                          * Mirror #3 is RAID6 Q block.
    5105             :                          */
    5106          14 :                         stripe_index = do_div(stripe_nr, nr_data_stripes(map));
    5107          14 :                         if (mirror_num > 1)
    5108           0 :                                 stripe_index = nr_data_stripes(map) +
    5109             :                                                 mirror_num - 2;
    5110             : 
    5111             :                         /* We distribute the parity blocks across stripes */
    5112          14 :                         tmp = stripe_nr + stripe_index;
    5113          14 :                         stripe_index = do_div(tmp, map->num_stripes);
    5114             :                 }
    5115             :         } else {
    5116             :                 /*
    5117             :                  * after this do_div call, stripe_nr is the number of stripes
    5118             :                  * on this device we have to walk to find the data, and
    5119             :                  * stripe_index is the number of our device in the stripe array
    5120             :                  */
    5121      106789 :                 stripe_index = do_div(stripe_nr, map->num_stripes);
    5122      106789 :                 mirror_num = stripe_index + 1;
    5123             :         }
    5124      251821 :         BUG_ON(stripe_index >= map->num_stripes);
    5125             : 
    5126             :         num_alloc_stripes = num_stripes;
    5127      251821 :         if (dev_replace_is_ongoing) {
    5128       93835 :                 if (rw & (REQ_WRITE | REQ_DISCARD))
    5129        4161 :                         num_alloc_stripes <<= 1;
    5130       93835 :                 if (rw & REQ_GET_READ_MIRRORS)
    5131         760 :                         num_alloc_stripes++;
    5132             :         }
    5133      251821 :         bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
    5134      251818 :         if (!bbio) {
    5135           0 :                 kfree(raid_map);
    5136             :                 ret = -ENOMEM;
    5137           0 :                 goto out;
    5138             :         }
    5139             :         atomic_set(&bbio->error, 0);
    5140             : 
    5141      251818 :         if (rw & REQ_DISCARD) {
    5142             :                 int factor = 0;
    5143             :                 int sub_stripes = 0;
    5144             :                 u64 stripes_per_dev = 0;
    5145             :                 u32 remaining_stripes = 0;
    5146             :                 u32 last_stripe = 0;
    5147             : 
    5148           0 :                 if (map->type &
    5149             :                     (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
    5150           0 :                         if (map->type & BTRFS_BLOCK_GROUP_RAID0)
    5151             :                                 sub_stripes = 1;
    5152             :                         else
    5153           0 :                                 sub_stripes = map->sub_stripes;
    5154             : 
    5155           0 :                         factor = map->num_stripes / sub_stripes;
    5156           0 :                         stripes_per_dev = div_u64_rem(stripe_nr_end -
    5157             :                                                       stripe_nr_orig,
    5158             :                                                       factor,
    5159             :                                                       &remaining_stripes);
    5160           0 :                         div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
    5161           0 :                         last_stripe *= sub_stripes;
    5162             :                 }
    5163             : 
    5164           0 :                 for (i = 0; i < num_stripes; i++) {
    5165           0 :                         bbio->stripes[i].physical =
    5166           0 :                                 map->stripes[stripe_index].physical +
    5167           0 :                                 stripe_offset + stripe_nr * map->stripe_len;
    5168           0 :                         bbio->stripes[i].dev = map->stripes[stripe_index].dev;
    5169             : 
    5170           0 :                         if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
    5171             :                                          BTRFS_BLOCK_GROUP_RAID10)) {
    5172           0 :                                 bbio->stripes[i].length = stripes_per_dev *
    5173           0 :                                                           map->stripe_len;
    5174             : 
    5175           0 :                                 if (i / sub_stripes < remaining_stripes)
    5176           0 :                                         bbio->stripes[i].length +=
    5177           0 :                                                 map->stripe_len;
    5178             : 
    5179             :                                 /*
    5180             :                                  * Special for the first stripe and
    5181             :                                  * the last stripe:
    5182             :                                  *
    5183             :                                  * |-------|...|-------|
    5184             :                                  *     |----------|
    5185             :                                  *    off     end_off
    5186             :                                  */
    5187           0 :                                 if (i < sub_stripes)
    5188           0 :                                         bbio->stripes[i].length -=
    5189             :                                                 stripe_offset;
    5190             : 
    5191           0 :                                 if (stripe_index >= last_stripe &&
    5192           0 :                                     stripe_index <= (last_stripe +
    5193           0 :                                                      sub_stripes - 1))
    5194           0 :                                         bbio->stripes[i].length -=
    5195             :                                                 stripe_end_offset;
    5196             : 
    5197           0 :                                 if (i == sub_stripes - 1)
    5198             :                                         stripe_offset = 0;
    5199             :                         } else
    5200           0 :                                 bbio->stripes[i].length = *length;
    5201             : 
    5202           0 :                         stripe_index++;
    5203           0 :                         if (stripe_index == map->num_stripes) {
    5204             :                                 /* This could only happen for RAID0/10 */
    5205             :                                 stripe_index = 0;
    5206           0 :                                 stripe_nr++;
    5207             :                         }
    5208             :                 }
    5209             :         } else {
    5210      314763 :                 for (i = 0; i < num_stripes; i++) {
    5211      314763 :                         bbio->stripes[i].physical =
    5212      314763 :                                 map->stripes[stripe_index].physical +
    5213      314763 :                                 stripe_offset +
    5214      314763 :                                 stripe_nr * map->stripe_len;
    5215      314763 :                         bbio->stripes[i].dev =
    5216      314763 :                                 map->stripes[stripe_index].dev;
    5217      314763 :                         stripe_index++;
    5218             :                 }
    5219             :         }
    5220             : 
    5221      251818 :         if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) {
    5222      121001 :                 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
    5223             :                                  BTRFS_BLOCK_GROUP_RAID10 |
    5224             :                                  BTRFS_BLOCK_GROUP_RAID5 |
    5225             :                                  BTRFS_BLOCK_GROUP_DUP)) {
    5226             :                         max_errors = 1;
    5227       58152 :                 } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) {
    5228             :                         max_errors = 2;
    5229             :                 }
    5230             :         }
    5231             : 
    5232      255979 :         if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
    5233        4161 :             dev_replace->tgtdev != NULL) {
    5234             :                 int index_where_to_add;
    5235        4161 :                 u64 srcdev_devid = dev_replace->srcdev->devid;
    5236             : 
    5237             :                 /*
    5238             :                  * duplicate the write operations while the dev replace
    5239             :                  * procedure is running. Since the copying of the old disk
    5240             :                  * to the new disk takes place at run time while the
    5241             :                  * filesystem is mounted writable, the regular write
    5242             :                  * operations to the old disk have to be duplicated to go
    5243             :                  * to the new disk as well.
    5244             :                  * Note that device->missing is handled by the caller, and
    5245             :                  * that the write to the old disk is already set up in the
    5246             :                  * stripes array.
    5247             :                  */
    5248             :                 index_where_to_add = num_stripes;
    5249       12316 :                 for (i = 0; i < num_stripes; i++) {
    5250        8155 :                         if (bbio->stripes[i].dev->devid == srcdev_devid) {
    5251             :                                 /* write to new disk, too */
    5252        4234 :                                 struct btrfs_bio_stripe *new =
    5253        4234 :                                         bbio->stripes + index_where_to_add;
    5254        4234 :                                 struct btrfs_bio_stripe *old =
    5255        4234 :                                         bbio->stripes + i;
    5256             : 
    5257        4234 :                                 new->physical = old->physical;
    5258        4234 :                                 new->length = old->length;
    5259        4234 :                                 new->dev = dev_replace->tgtdev;
    5260        4234 :                                 index_where_to_add++;
    5261        4234 :                                 max_errors++;
    5262             :                         }
    5263             :                 }
    5264             :                 num_stripes = index_where_to_add;
    5265      248417 :         } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
    5266         760 :                    dev_replace->tgtdev != NULL) {
    5267         760 :                 u64 srcdev_devid = dev_replace->srcdev->devid;
    5268             :                 int index_srcdev = 0;
    5269             :                 int found = 0;
    5270             :                 u64 physical_of_found = 0;
    5271             : 
    5272             :                 /*
    5273             :                  * During the dev-replace procedure, the target drive can
    5274             :                  * also be used to read data in case it is needed to repair
    5275             :                  * a corrupt block elsewhere. This is possible if the
    5276             :                  * requested area is left of the left cursor. In this area,
    5277             :                  * the target drive is a full copy of the source drive.
    5278             :                  */
    5279        2158 :                 for (i = 0; i < num_stripes; i++) {
    5280        1398 :                         if (bbio->stripes[i].dev->devid == srcdev_devid) {
    5281             :                                 /*
    5282             :                                  * In case of DUP, in order to keep it
    5283             :                                  * simple, only add the mirror with the
    5284             :                                  * lowest physical address
    5285             :                                  */
    5286        1329 :                                 if (found &&
    5287             :                                     physical_of_found <=
    5288         289 :                                      bbio->stripes[i].physical)
    5289         289 :                                         continue;
    5290             :                                 index_srcdev = i;
    5291             :                                 found = 1;
    5292         751 :                                 physical_of_found = bbio->stripes[i].physical;
    5293             :                         }
    5294             :                 }
    5295         760 :                 if (found) {
    5296         751 :                         u64 length = map->stripe_len;
    5297             : 
    5298        1502 :                         if (physical_of_found + length <=
    5299         751 :                             dev_replace->cursor_left) {
    5300         508 :                                 struct btrfs_bio_stripe *tgtdev_stripe =
    5301         508 :                                         bbio->stripes + num_stripes;
    5302             : 
    5303         508 :                                 tgtdev_stripe->physical = physical_of_found;
    5304         508 :                                 tgtdev_stripe->length =
    5305         508 :                                         bbio->stripes[index_srcdev].length;
    5306         508 :                                 tgtdev_stripe->dev = dev_replace->tgtdev;
    5307             : 
    5308         508 :                                 num_stripes++;
    5309             :                         }
    5310             :                 }
    5311             :         }
    5312             : 
    5313      251818 :         *bbio_ret = bbio;
    5314      251818 :         bbio->num_stripes = num_stripes;
    5315      251818 :         bbio->max_errors = max_errors;
    5316      251818 :         bbio->mirror_num = mirror_num;
    5317             : 
    5318             :         /*
    5319             :          * this is the case that REQ_READ && dev_replace_is_ongoing &&
    5320             :          * mirror_num == num_stripes + 1 && dev_replace target drive is
    5321             :          * available as a mirror
    5322             :          */
    5323      251818 :         if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
    5324           0 :                 WARN_ON(num_stripes > 1);
    5325           0 :                 bbio->stripes[0].dev = dev_replace->tgtdev;
    5326           0 :                 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
    5327           0 :                 bbio->mirror_num = map->num_stripes + 1;
    5328             :         }
    5329      251818 :         if (raid_map) {
    5330          38 :                 sort_parity_stripes(bbio, raid_map);
    5331          38 :                 *raid_map_ret = raid_map;
    5332             :         }
    5333             : out:
    5334     1770717 :         if (dev_replace_is_ongoing)
    5335       93835 :                 btrfs_dev_replace_unlock(dev_replace);
    5336     1770717 :         free_extent_map(em);
    5337     1772287 :         return ret;
    5338             : }
    5339             : 
    5340     1624878 : int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
    5341             :                       u64 logical, u64 *length,
    5342             :                       struct btrfs_bio **bbio_ret, int mirror_num)
    5343             : {
    5344     1624878 :         return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
    5345             :                                  mirror_num, NULL);
    5346             : }
    5347             : 
    5348        3684 : int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
    5349             :                      u64 chunk_start, u64 physical, u64 devid,
    5350             :                      u64 **logical, int *naddrs, int *stripe_len)
    5351             : {
    5352        3684 :         struct extent_map_tree *em_tree = &map_tree->map_tree;
    5353             :         struct extent_map *em;
    5354             :         struct map_lookup *map;
    5355             :         u64 *buf;
    5356             :         u64 bytenr;
    5357             :         u64 length;
    5358             :         u64 stripe_nr;
    5359             :         u64 rmap_len;
    5360             :         int i, j, nr = 0;
    5361             : 
    5362        3684 :         read_lock(&em_tree->lock);
    5363        3684 :         em = lookup_extent_mapping(em_tree, chunk_start, 1);
    5364             :         read_unlock(&em_tree->lock);
    5365             : 
    5366        3684 :         if (!em) {
    5367           0 :                 printk(KERN_ERR "BTRFS: couldn't find em for chunk %Lu\n",
    5368             :                        chunk_start);
    5369           0 :                 return -EIO;
    5370             :         }
    5371             : 
    5372        3684 :         if (em->start != chunk_start) {
    5373           0 :                 printk(KERN_ERR "BTRFS: bad chunk start, em=%Lu, wanted=%Lu\n",
    5374             :                        em->start, chunk_start);
    5375           0 :                 free_extent_map(em);
    5376           0 :                 return -EIO;
    5377             :         }
    5378        3684 :         map = (struct map_lookup *)em->bdev;
    5379             : 
    5380        3684 :         length = em->len;
    5381        3684 :         rmap_len = map->stripe_len;
    5382             : 
    5383        3684 :         if (map->type & BTRFS_BLOCK_GROUP_RAID10)
    5384           9 :                 do_div(length, map->num_stripes / map->sub_stripes);
    5385        3675 :         else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
    5386          33 :                 do_div(length, map->num_stripes);
    5387        3642 :         else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
    5388             :                               BTRFS_BLOCK_GROUP_RAID6)) {
    5389          18 :                 do_div(length, nr_data_stripes(map));
    5390          18 :                 rmap_len = map->stripe_len * nr_data_stripes(map);
    5391             :         }
    5392             : 
    5393        3684 :         buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
    5394        3684 :         BUG_ON(!buf); /* -ENOMEM */
    5395             : 
    5396        5247 :         for (i = 0; i < map->num_stripes; i++) {
    5397        5247 :                 if (devid && map->stripes[i].dev->devid != devid)
    5398           0 :                         continue;
    5399        8555 :                 if (map->stripes[i].physical > physical ||
    5400        3308 :                     map->stripes[i].physical + length <= physical)
    5401        4769 :                         continue;
    5402             : 
    5403         478 :                 stripe_nr = physical - map->stripes[i].physical;
    5404         478 :                 do_div(stripe_nr, map->stripe_len);
    5405             : 
    5406         478 :                 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
    5407           4 :                         stripe_nr = stripe_nr * map->num_stripes + i;
    5408           4 :                         do_div(stripe_nr, map->sub_stripes);
    5409         474 :                 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
    5410          12 :                         stripe_nr = stripe_nr * map->num_stripes + i;
    5411             :                 } /* else if RAID[56], multiply by nr_data_stripes().
    5412             :                    * Alternatively, just use rmap_len below instead of
    5413             :                    * map->stripe_len */
    5414             : 
    5415         478 :                 bytenr = chunk_start + stripe_nr * rmap_len;
    5416         478 :                 WARN_ON(nr >= map->num_stripes);
    5417         500 :                 for (j = 0; j < nr; j++) {
    5418          28 :                         if (buf[j] == bytenr)
    5419             :                                 break;
    5420             :                 }
    5421         478 :                 if (j == nr) {
    5422         472 :                         WARN_ON(nr >= map->num_stripes);
    5423         472 :                         buf[nr++] = bytenr;
    5424             :                 }
    5425             :         }
    5426             : 
    5427        3684 :         *logical = buf;
    5428        3684 :         *naddrs = nr;
    5429        3684 :         *stripe_len = rmap_len;
    5430             : 
    5431        3684 :         free_extent_map(em);
    5432        3684 :         return 0;
    5433             : }
    5434             : 
    5435      147560 : static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
    5436             : {
    5437      147560 :         if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED))
    5438      147560 :                 bio_endio_nodec(bio, err);
    5439             :         else
    5440           0 :                 bio_endio(bio, err);
    5441      147557 :         kfree(bbio);
    5442      147556 : }
    5443             : 
    5444      212900 : static void btrfs_end_bio(struct bio *bio, int err)
    5445             : {
    5446      212900 :         struct btrfs_bio *bbio = bio->bi_private;
    5447             :         struct btrfs_device *dev = bbio->stripes[0].dev;
    5448             :         int is_orig_bio = 0;
    5449             : 
    5450      212900 :         if (err) {
    5451           0 :                 atomic_inc(&bbio->error);
    5452           0 :                 if (err == -EIO || err == -EREMOTEIO) {
    5453           0 :                         unsigned int stripe_index =
    5454           0 :                                 btrfs_io_bio(bio)->stripe_index;
    5455             : 
    5456           0 :                         BUG_ON(stripe_index >= bbio->num_stripes);
    5457           0 :                         dev = bbio->stripes[stripe_index].dev;
    5458           0 :                         if (dev->bdev) {
    5459           0 :                                 if (bio->bi_rw & WRITE)
    5460             :                                         btrfs_dev_stat_inc(dev,
    5461             :                                                 BTRFS_DEV_STAT_WRITE_ERRS);
    5462             :                                 else
    5463             :                                         btrfs_dev_stat_inc(dev,
    5464             :                                                 BTRFS_DEV_STAT_READ_ERRS);
    5465           0 :                                 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
    5466             :                                         btrfs_dev_stat_inc(dev,
    5467             :                                                 BTRFS_DEV_STAT_FLUSH_ERRS);
    5468           0 :                                 btrfs_dev_stat_print_on_error(dev);
    5469             :                         }
    5470             :                 }
    5471             :         }
    5472             : 
    5473      212911 :         if (bio == bbio->orig_bio)
    5474             :                 is_orig_bio = 1;
    5475             : 
    5476      212911 :         btrfs_bio_counter_dec(bbio->fs_info);
    5477             : 
    5478      425803 :         if (atomic_dec_and_test(&bbio->stripes_pending)) {
    5479      147558 :                 if (!is_orig_bio) {
    5480       12216 :                         bio_put(bio);
    5481       12216 :                         bio = bbio->orig_bio;
    5482             :                 }
    5483             : 
    5484      147558 :                 bio->bi_private = bbio->private;
    5485      147558 :                 bio->bi_end_io = bbio->end_io;
    5486      147558 :                 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
    5487             :                 /* only send an error to the higher layers if it is
    5488             :                  * beyond the tolerance of the btrfs bio
    5489             :                  */
    5490      147558 :                 if (atomic_read(&bbio->error) > bbio->max_errors) {
    5491             :                         err = -EIO;
    5492             :                 } else {
    5493             :                         /*
    5494             :                          * this bio is actually up to date, we didn't
    5495             :                          * go over the max number of errors
    5496             :                          */
    5497             :                         set_bit(BIO_UPTODATE, &bio->bi_flags);
    5498             :                         err = 0;
    5499             :                 }
    5500             : 
    5501      147566 :                 btrfs_end_bbio(bbio, bio, err);
    5502       65356 :         } else if (!is_orig_bio) {
    5503       53140 :                 bio_put(bio);
    5504             :         }
    5505      212911 : }
    5506             : 
    5507             : /*
    5508             :  * see run_scheduled_bios for a description of why bios are collected for
    5509             :  * async submit.
    5510             :  *
    5511             :  * This will add one bio to the pending list for a device and make sure
    5512             :  * the work struct is scheduled.
    5513             :  */
    5514      105965 : static noinline void btrfs_schedule_bio(struct btrfs_root *root,
    5515             :                                         struct btrfs_device *device,
    5516             :                                         int rw, struct bio *bio)
    5517             : {
    5518             :         int should_queue = 1;
    5519             :         struct btrfs_pending_bios *pending_bios;
    5520             : 
    5521      105965 :         if (device->missing || !device->bdev) {
    5522           0 :                 bio_endio(bio, -EIO);
    5523             :                 return;
    5524             :         }
    5525             : 
    5526             :         /* don't bother with additional async steps for reads, right now */
    5527      105965 :         if (!(rw & REQ_WRITE)) {
    5528           0 :                 bio_get(bio);
    5529           0 :                 btrfsic_submit_bio(rw, bio);
    5530           0 :                 bio_put(bio);
    5531             :                 return;
    5532             :         }
    5533             : 
    5534             :         /*
    5535             :          * nr_async_bios allows us to reliably return congestion to the
    5536             :          * higher layers.  Otherwise, the async bio makes it appear we have
    5537             :          * made progress against dirty pages when we've really just put it
    5538             :          * on a queue for later
    5539             :          */
    5540      105965 :         atomic_inc(&root->fs_info->nr_async_bios);
    5541      105966 :         WARN_ON(bio->bi_next);
    5542      105966 :         bio->bi_next = NULL;
    5543      105966 :         bio->bi_rw |= rw;
    5544             : 
    5545             :         spin_lock(&device->io_lock);
    5546      105966 :         if (bio->bi_rw & REQ_SYNC)
    5547         628 :                 pending_bios = &device->pending_sync_bios;
    5548             :         else
    5549      105338 :                 pending_bios = &device->pending_bios;
    5550             : 
    5551      105966 :         if (pending_bios->tail)
    5552       97958 :                 pending_bios->tail->bi_next = bio;
    5553             : 
    5554      105966 :         pending_bios->tail = bio;
    5555      105966 :         if (!pending_bios->head)
    5556        8008 :                 pending_bios->head = bio;
    5557      105966 :         if (device->running_pending)
    5558             :                 should_queue = 0;
    5559             : 
    5560             :         spin_unlock(&device->io_lock);
    5561             : 
    5562      105966 :         if (should_queue)
    5563       36775 :                 btrfs_queue_work(root->fs_info->submit_workers,
    5564             :                                  &device->work);
    5565             : }
    5566             : 
    5567      212914 : static int bio_size_ok(struct block_device *bdev, struct bio *bio,
    5568             :                        sector_t sector)
    5569             : {
    5570             :         struct bio_vec *prev;
    5571      212914 :         struct request_queue *q = bdev_get_queue(bdev);
    5572             :         unsigned int max_sectors = queue_max_sectors(q);
    5573      425828 :         struct bvec_merge_data bvm = {
    5574             :                 .bi_bdev = bdev,
    5575             :                 .bi_sector = sector,
    5576      212914 :                 .bi_rw = bio->bi_rw,
    5577             :         };
    5578             : 
    5579      212914 :         if (WARN_ON(bio->bi_vcnt == 0))
    5580             :                 return 1;
    5581             : 
    5582      212912 :         prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
    5583      212912 :         if (bio_sectors(bio) > max_sectors)
    5584             :                 return 0;
    5585             : 
    5586      212912 :         if (!q->merge_bvec_fn)
    5587             :                 return 1;
    5588             : 
    5589           0 :         bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len;
    5590           0 :         if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
    5591             :                 return 0;
    5592           0 :         return 1;
    5593             : }
    5594             : 
    5595      212912 : static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
    5596             :                               struct bio *bio, u64 physical, int dev_nr,
    5597             :                               int rw, int async)
    5598             : {
    5599      212912 :         struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
    5600             : 
    5601      212912 :         bio->bi_private = bbio;
    5602      212912 :         btrfs_io_bio(bio)->stripe_index = dev_nr;
    5603      212912 :         bio->bi_end_io = btrfs_end_bio;
    5604      212912 :         bio->bi_iter.bi_sector = physical >> 9;
    5605             : #ifdef DEBUG
    5606             :         {
    5607             :                 struct rcu_string *name;
    5608             : 
    5609             :                 rcu_read_lock();
    5610             :                 name = rcu_dereference(dev->name);
    5611             :                 pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu "
    5612             :                          "(%s id %llu), size=%u\n", rw,
    5613             :                          (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
    5614             :                          name->str, dev->devid, bio->bi_size);
    5615             :                 rcu_read_unlock();
    5616             :         }
    5617             : #endif
    5618      212912 :         bio->bi_bdev = dev->bdev;
    5619             : 
    5620      212912 :         btrfs_bio_counter_inc_noblocked(root->fs_info);
    5621             : 
    5622      212910 :         if (async)
    5623      105964 :                 btrfs_schedule_bio(root, dev, rw, bio);
    5624             :         else
    5625      106946 :                 btrfsic_submit_bio(rw, bio);
    5626      212915 : }
    5627             : 
    5628           0 : static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
    5629             :                               struct bio *first_bio, struct btrfs_device *dev,
    5630             :                               int dev_nr, int rw, int async)
    5631             : {
    5632           0 :         struct bio_vec *bvec = first_bio->bi_io_vec;
    5633             :         struct bio *bio;
    5634           0 :         int nr_vecs = bio_get_nr_vecs(dev->bdev);
    5635           0 :         u64 physical = bbio->stripes[dev_nr].physical;
    5636             : 
    5637             : again:
    5638           0 :         bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS);
    5639           0 :         if (!bio)
    5640             :                 return -ENOMEM;
    5641             : 
    5642           0 :         while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
    5643           0 :                 if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
    5644           0 :                                  bvec->bv_offset) < bvec->bv_len) {
    5645           0 :                         u64 len = bio->bi_iter.bi_size;
    5646             : 
    5647           0 :                         atomic_inc(&bbio->stripes_pending);
    5648           0 :                         submit_stripe_bio(root, bbio, bio, physical, dev_nr,
    5649             :                                           rw, async);
    5650           0 :                         physical += len;
    5651             :                         goto again;
    5652             :                 }
    5653           0 :                 bvec++;
    5654             :         }
    5655             : 
    5656           0 :         submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async);
    5657             :         return 0;
    5658             : }
    5659             : 
    5660           0 : static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
    5661             : {
    5662           0 :         atomic_inc(&bbio->error);
    5663           0 :         if (atomic_dec_and_test(&bbio->stripes_pending)) {
    5664             :                 /* Shoud be the original bio. */
    5665           0 :                 WARN_ON(bio != bbio->orig_bio);
    5666             : 
    5667           0 :                 bio->bi_private = bbio->private;
    5668           0 :                 bio->bi_end_io = bbio->end_io;
    5669           0 :                 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
    5670           0 :                 bio->bi_iter.bi_sector = logical >> 9;
    5671             : 
    5672           0 :                 btrfs_end_bbio(bbio, bio, -EIO);
    5673             :         }
    5674           0 : }
    5675             : 
    5676      147591 : int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
    5677             :                   int mirror_num, int async_submit)
    5678             : {
    5679             :         struct btrfs_device *dev;
    5680             :         struct bio *first_bio = bio;
    5681      147591 :         u64 logical = (u64)bio->bi_iter.bi_sector << 9;
    5682             :         u64 length = 0;
    5683             :         u64 map_length;
    5684      147591 :         u64 *raid_map = NULL;
    5685             :         int ret;
    5686             :         int dev_nr = 0;
    5687             :         int total_devs = 1;
    5688      147591 :         struct btrfs_bio *bbio = NULL;
    5689             : 
    5690      147591 :         length = bio->bi_iter.bi_size;
    5691      147591 :         map_length = length;
    5692             : 
    5693      147591 :         btrfs_bio_counter_inc_blocked(root->fs_info);
    5694      147590 :         ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
    5695             :                               mirror_num, &raid_map);
    5696      147595 :         if (ret) {
    5697           0 :                 btrfs_bio_counter_dec(root->fs_info);
    5698           0 :                 return ret;
    5699             :         }
    5700             : 
    5701      147595 :         total_devs = bbio->num_stripes;
    5702      147595 :         bbio->orig_bio = first_bio;
    5703      147595 :         bbio->private = first_bio->bi_private;
    5704      147595 :         bbio->end_io = first_bio->bi_end_io;
    5705      147595 :         bbio->fs_info = root->fs_info;
    5706             :         atomic_set(&bbio->stripes_pending, bbio->num_stripes);
    5707             : 
    5708      147595 :         if (raid_map) {
    5709             :                 /* In this case, map_length has been set to the length of
    5710             :                    a single stripe; not the whole write */
    5711          38 :                 if (rw & WRITE) {
    5712          38 :                         ret = raid56_parity_write(root, bio, bbio,
    5713             :                                                   raid_map, map_length);
    5714             :                 } else {
    5715           0 :                         ret = raid56_parity_recover(root, bio, bbio,
    5716             :                                                     raid_map, map_length,
    5717             :                                                     mirror_num);
    5718             :                 }
    5719             :                 /*
    5720             :                  * FIXME, replace dosen't support raid56 yet, please fix
    5721             :                  * it in the future.
    5722             :                  */
    5723          38 :                 btrfs_bio_counter_dec(root->fs_info);
    5724          38 :                 return ret;
    5725             :         }
    5726             : 
    5727      147557 :         if (map_length < length) {
    5728           0 :                 btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
    5729             :                         logical, length, map_length);
    5730           0 :                 BUG();
    5731             :         }
    5732             : 
    5733      360471 :         while (dev_nr < total_devs) {
    5734      212912 :                 dev = bbio->stripes[dev_nr].dev;
    5735      212912 :                 if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
    5736           0 :                         bbio_error(bbio, first_bio, logical);
    5737           0 :                         dev_nr++;
    5738           0 :                         continue;
    5739             :                 }
    5740             : 
    5741             :                 /*
    5742             :                  * Check and see if we're ok with this bio based on it's size
    5743             :                  * and offset with the given device.
    5744             :                  */
    5745      212916 :                 if (!bio_size_ok(dev->bdev, first_bio,
    5746      212916 :                                  bbio->stripes[dev_nr].physical >> 9)) {
    5747           0 :                         ret = breakup_stripe_bio(root, bbio, first_bio, dev,
    5748             :                                                  dev_nr, rw, async_submit);
    5749           0 :                         BUG_ON(ret);
    5750           0 :                         dev_nr++;
    5751           0 :                         continue;
    5752             :                 }
    5753             : 
    5754      212913 :                 if (dev_nr < total_devs - 1) {
    5755       65356 :                         bio = btrfs_bio_clone(first_bio, GFP_NOFS);
    5756       65356 :                         BUG_ON(!bio); /* -ENOMEM */
    5757             :                 } else {
    5758             :                         bio = first_bio;
    5759      147557 :                         bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED;
    5760             :                 }
    5761             : 
    5762      212913 :                 submit_stripe_bio(root, bbio, bio,
    5763      212913 :                                   bbio->stripes[dev_nr].physical, dev_nr, rw,
    5764             :                                   async_submit);
    5765      212914 :                 dev_nr++;
    5766             :         }
    5767      147559 :         btrfs_bio_counter_dec(root->fs_info);
    5768      147559 :         return 0;
    5769             : }
    5770             : 
    5771        1973 : struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
    5772             :                                        u8 *uuid, u8 *fsid)
    5773             : {
    5774             :         struct btrfs_device *device;
    5775             :         struct btrfs_fs_devices *cur_devices;
    5776             : 
    5777        1973 :         cur_devices = fs_info->fs_devices;
    5778        3970 :         while (cur_devices) {
    5779        2229 :                 if (!fsid ||
    5780         256 :                     !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
    5781        1973 :                         device = __find_device(&cur_devices->devices,
    5782             :                                                devid, uuid);
    5783        1973 :                         if (device)
    5784             :                                 return device;
    5785             :                 }
    5786          24 :                 cur_devices = cur_devices->seed;
    5787             :         }
    5788             :         return NULL;
    5789             : }
    5790             : 
    5791           0 : static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
    5792             :                                             u64 devid, u8 *dev_uuid)
    5793             : {
    5794             :         struct btrfs_device *device;
    5795           0 :         struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
    5796             : 
    5797           0 :         device = btrfs_alloc_device(NULL, &devid, dev_uuid);
    5798           0 :         if (IS_ERR(device))
    5799             :                 return NULL;
    5800             : 
    5801           0 :         list_add(&device->dev_list, &fs_devices->devices);
    5802           0 :         device->fs_devices = fs_devices;
    5803           0 :         fs_devices->num_devices++;
    5804             : 
    5805           0 :         device->missing = 1;
    5806           0 :         fs_devices->missing_devices++;
    5807             : 
    5808             :         return device;
    5809             : }
    5810             : 
    5811             : /**
    5812             :  * btrfs_alloc_device - allocate struct btrfs_device
    5813             :  * @fs_info:    used only for generating a new devid, can be NULL if
    5814             :  *              devid is provided (i.e. @devid != NULL).
    5815             :  * @devid:      a pointer to devid for this device.  If NULL a new devid
    5816             :  *              is generated.
    5817             :  * @uuid:       a pointer to UUID for this device.  If NULL a new UUID
    5818             :  *              is generated.
    5819             :  *
    5820             :  * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
    5821             :  * on error.  Returned struct is not linked onto any lists and can be
    5822             :  * destroyed with kfree() right away.
    5823             :  */
    5824         382 : struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
    5825             :                                         const u64 *devid,
    5826             :                                         const u8 *uuid)
    5827             : {
    5828             :         struct btrfs_device *dev;
    5829             :         u64 tmp;
    5830             : 
    5831         382 :         if (WARN_ON(!devid && !fs_info))
    5832             :                 return ERR_PTR(-EINVAL);
    5833             : 
    5834         382 :         dev = __alloc_device();
    5835         382 :         if (IS_ERR(dev))
    5836             :                 return dev;
    5837             : 
    5838         382 :         if (devid)
    5839         382 :                 tmp = *devid;
    5840             :         else {
    5841             :                 int ret;
    5842             : 
    5843           0 :                 ret = find_next_devid(fs_info, &tmp);
    5844           0 :                 if (ret) {
    5845           0 :                         kfree(dev);
    5846           0 :                         return ERR_PTR(ret);
    5847             :                 }
    5848             :         }
    5849         382 :         dev->devid = tmp;
    5850             : 
    5851         382 :         if (uuid)
    5852         374 :                 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
    5853             :         else
    5854           8 :                 generate_random_uuid(dev->uuid);
    5855             : 
    5856         382 :         btrfs_init_work(&dev->work, btrfs_submit_helper,
    5857             :                         pending_bios_fn, NULL, NULL);
    5858             : 
    5859         382 :         return dev;
    5860             : }
    5861             : 
    5862        1572 : static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
    5863             :                           struct extent_buffer *leaf,
    5864             :                           struct btrfs_chunk *chunk)
    5865             : {
    5866        1572 :         struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
    5867             :         struct map_lookup *map;
    5868             :         struct extent_map *em;
    5869             :         u64 logical;
    5870             :         u64 length;
    5871             :         u64 devid;
    5872             :         u8 uuid[BTRFS_UUID_SIZE];
    5873             :         int num_stripes;
    5874             :         int ret;
    5875             :         int i;
    5876             : 
    5877        1572 :         logical = key->offset;
    5878             :         length = btrfs_chunk_length(leaf, chunk);
    5879             : 
    5880        1572 :         read_lock(&map_tree->map_tree.lock);
    5881        1572 :         em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
    5882             :         read_unlock(&map_tree->map_tree.lock);
    5883             : 
    5884             :         /* already mapped? */
    5885        1572 :         if (em && em->start <= logical && em->start + em->len > logical) {
    5886         431 :                 free_extent_map(em);
    5887         431 :                 return 0;
    5888        1141 :         } else if (em) {
    5889           0 :                 free_extent_map(em);
    5890             :         }
    5891             : 
    5892        1141 :         em = alloc_extent_map();
    5893        1141 :         if (!em)
    5894             :                 return -ENOMEM;
    5895        1141 :         num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
    5896        1141 :         map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
    5897        1141 :         if (!map) {
    5898           0 :                 free_extent_map(em);
    5899           0 :                 return -ENOMEM;
    5900             :         }
    5901             : 
    5902             :         set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
    5903        1141 :         em->bdev = (struct block_device *)map;
    5904        1141 :         em->start = logical;
    5905        1141 :         em->len = length;
    5906        1141 :         em->orig_start = 0;
    5907        1141 :         em->block_start = 0;
    5908        1141 :         em->block_len = em->len;
    5909             : 
    5910        1141 :         map->num_stripes = num_stripes;
    5911        1141 :         map->io_width = btrfs_chunk_io_width(leaf, chunk);
    5912        1141 :         map->io_align = btrfs_chunk_io_align(leaf, chunk);
    5913        1141 :         map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
    5914        1141 :         map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
    5915        1141 :         map->type = btrfs_chunk_type(leaf, chunk);
    5916        1141 :         map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
    5917        2757 :         for (i = 0; i < num_stripes; i++) {
    5918        1616 :                 map->stripes[i].physical =
    5919             :                         btrfs_stripe_offset_nr(leaf, chunk, i);
    5920             :                 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
    5921        1616 :                 read_extent_buffer(leaf, uuid, (unsigned long)
    5922        1616 :                                    btrfs_stripe_dev_uuid_nr(chunk, i),
    5923             :                                    BTRFS_UUID_SIZE);
    5924        1616 :                 map->stripes[i].dev = btrfs_find_device(root->fs_info, devid,
    5925             :                                                         uuid, NULL);
    5926        1616 :                 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
    5927           0 :                         free_extent_map(em);
    5928           0 :                         return -EIO;
    5929             :                 }
    5930        1616 :                 if (!map->stripes[i].dev) {
    5931           0 :                         map->stripes[i].dev =
    5932           0 :                                 add_missing_dev(root, devid, uuid);
    5933           0 :                         if (!map->stripes[i].dev) {
    5934           0 :                                 free_extent_map(em);
    5935           0 :                                 return -EIO;
    5936             :                         }
    5937             :                 }
    5938        1616 :                 map->stripes[i].dev->in_fs_metadata = 1;
    5939             :         }
    5940             : 
    5941        1141 :         write_lock(&map_tree->map_tree.lock);
    5942        1141 :         ret = add_extent_mapping(&map_tree->map_tree, em, 0);
    5943             :         write_unlock(&map_tree->map_tree.lock);
    5944        1141 :         BUG_ON(ret); /* Tree corruption */
    5945        1141 :         free_extent_map(em);
    5946             : 
    5947        1141 :         return 0;
    5948             : }
    5949             : 
    5950         248 : static void fill_device_from_item(struct extent_buffer *leaf,
    5951             :                                  struct btrfs_dev_item *dev_item,
    5952             :                                  struct btrfs_device *device)
    5953             : {
    5954             :         unsigned long ptr;
    5955             : 
    5956         248 :         device->devid = btrfs_device_id(leaf, dev_item);
    5957         248 :         device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
    5958         248 :         device->total_bytes = device->disk_total_bytes;
    5959         248 :         device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
    5960         248 :         device->type = btrfs_device_type(leaf, dev_item);
    5961         248 :         device->io_align = btrfs_device_io_align(leaf, dev_item);
    5962         248 :         device->io_width = btrfs_device_io_width(leaf, dev_item);
    5963         248 :         device->sector_size = btrfs_device_sector_size(leaf, dev_item);
    5964         248 :         WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
    5965         248 :         device->is_tgtdev_for_dev_replace = 0;
    5966             : 
    5967             :         ptr = btrfs_device_uuid(dev_item);
    5968         248 :         read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
    5969         248 : }
    5970             : 
    5971           0 : static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
    5972             : {
    5973             :         struct btrfs_fs_devices *fs_devices;
    5974             :         int ret;
    5975             : 
    5976           0 :         BUG_ON(!mutex_is_locked(&uuid_mutex));
    5977             : 
    5978           0 :         fs_devices = root->fs_info->fs_devices->seed;
    5979           0 :         while (fs_devices) {
    5980           0 :                 if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
    5981             :                         ret = 0;
    5982             :                         goto out;
    5983             :                 }
    5984           0 :                 fs_devices = fs_devices->seed;
    5985             :         }
    5986             : 
    5987           0 :         fs_devices = find_fsid(fsid);
    5988           0 :         if (!fs_devices) {
    5989             :                 ret = -ENOENT;
    5990             :                 goto out;
    5991             :         }
    5992             : 
    5993           0 :         fs_devices = clone_fs_devices(fs_devices);
    5994           0 :         if (IS_ERR(fs_devices)) {
    5995           0 :                 ret = PTR_ERR(fs_devices);
    5996             :                 goto out;
    5997             :         }
    5998             : 
    5999           0 :         ret = __btrfs_open_devices(fs_devices, FMODE_READ,
    6000           0 :                                    root->fs_info->bdev_holder);
    6001           0 :         if (ret) {
    6002           0 :                 free_fs_devices(fs_devices);
    6003             :                 goto out;
    6004             :         }
    6005             : 
    6006           0 :         if (!fs_devices->seeding) {
    6007           0 :                 __btrfs_close_devices(fs_devices);
    6008           0 :                 free_fs_devices(fs_devices);
    6009             :                 ret = -EINVAL;
    6010             :                 goto out;
    6011             :         }
    6012             : 
    6013           0 :         fs_devices->seed = root->fs_info->fs_devices->seed;
    6014           0 :         root->fs_info->fs_devices->seed = fs_devices;
    6015             : out:
    6016           0 :         return ret;
    6017             : }
    6018             : 
    6019         248 : static int read_one_dev(struct btrfs_root *root,
    6020             :                         struct extent_buffer *leaf,
    6021             :                         struct btrfs_dev_item *dev_item)
    6022             : {
    6023             :         struct btrfs_device *device;
    6024             :         u64 devid;
    6025             :         int ret;
    6026             :         u8 fs_uuid[BTRFS_UUID_SIZE];
    6027             :         u8 dev_uuid[BTRFS_UUID_SIZE];
    6028             : 
    6029             :         devid = btrfs_device_id(leaf, dev_item);
    6030         248 :         read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
    6031             :                            BTRFS_UUID_SIZE);
    6032         248 :         read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
    6033             :                            BTRFS_UUID_SIZE);
    6034             : 
    6035         248 :         if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
    6036           0 :                 ret = open_seed_devices(root, fs_uuid);
    6037           0 :                 if (ret && !btrfs_test_opt(root, DEGRADED))
    6038             :                         return ret;
    6039             :         }
    6040             : 
    6041         248 :         device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid);
    6042         248 :         if (!device || !device->bdev) {
    6043           0 :                 if (!btrfs_test_opt(root, DEGRADED))
    6044             :                         return -EIO;
    6045             : 
    6046           0 :                 if (!device) {
    6047           0 :                         btrfs_warn(root->fs_info, "devid %llu missing", devid);
    6048           0 :                         device = add_missing_dev(root, devid, dev_uuid);
    6049           0 :                         if (!device)
    6050             :                                 return -ENOMEM;
    6051           0 :                 } else if (!device->missing) {
    6052             :                         /*
    6053             :                          * this happens when a device that was properly setup
    6054             :                          * in the device info lists suddenly goes bad.
    6055             :                          * device->bdev is NULL, and so we have to set
    6056             :                          * device->missing to one here
    6057             :                          */
    6058           0 :                         root->fs_info->fs_devices->missing_devices++;
    6059           0 :                         device->missing = 1;
    6060             :                 }
    6061             :         }
    6062             : 
    6063         248 :         if (device->fs_devices != root->fs_info->fs_devices) {
    6064           0 :                 BUG_ON(device->writeable);
    6065           0 :                 if (device->generation !=
    6066             :                     btrfs_device_generation(leaf, dev_item))
    6067             :                         return -EINVAL;
    6068             :         }
    6069             : 
    6070         248 :         fill_device_from_item(leaf, dev_item, device);
    6071         248 :         device->in_fs_metadata = 1;
    6072         248 :         if (device->writeable && !device->is_tgtdev_for_dev_replace) {
    6073         248 :                 device->fs_devices->total_rw_bytes += device->total_bytes;
    6074         248 :                 spin_lock(&root->fs_info->free_chunk_lock);
    6075         496 :                 root->fs_info->free_chunk_space += device->total_bytes -
    6076         248 :                         device->bytes_used;
    6077         248 :                 spin_unlock(&root->fs_info->free_chunk_lock);
    6078             :         }
    6079             :         ret = 0;
    6080             :         return ret;
    6081             : }
    6082             : 
    6083         221 : int btrfs_read_sys_array(struct btrfs_root *root)
    6084             : {
    6085         221 :         struct btrfs_super_block *super_copy = root->fs_info->super_copy;
    6086             :         struct extent_buffer *sb;
    6087             :         struct btrfs_disk_key *disk_key;
    6088             :         struct btrfs_chunk *chunk;
    6089             :         u8 *ptr;
    6090             :         unsigned long sb_ptr;
    6091             :         int ret = 0;
    6092             :         u32 num_stripes;
    6093             :         u32 array_size;
    6094             :         u32 len = 0;
    6095             :         u32 cur;
    6096             :         struct btrfs_key key;
    6097             : 
    6098         221 :         sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
    6099             :                                           BTRFS_SUPER_INFO_SIZE);
    6100         221 :         if (!sb)
    6101             :                 return -ENOMEM;
    6102         221 :         btrfs_set_buffer_uptodate(sb);
    6103             :         btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
    6104             :         /*
    6105             :          * The sb extent buffer is artifical and just used to read the system array.
    6106             :          * btrfs_set_buffer_uptodate() call does not properly mark all it's
    6107             :          * pages up-to-date when the page is larger: extent does not cover the
    6108             :          * whole page and consequently check_page_uptodate does not find all
    6109             :          * the page's extents up-to-date (the hole beyond sb),
    6110             :          * write_extent_buffer then triggers a WARN_ON.
    6111             :          *
    6112             :          * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
    6113             :          * but sb spans only this function. Add an explicit SetPageUptodate call
    6114             :          * to silence the warning eg. on PowerPC 64.
    6115             :          */
    6116             :         if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
    6117             :                 SetPageUptodate(sb->pages[0]);
    6118             : 
    6119         221 :         write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
    6120             :         array_size = btrfs_super_sys_array_size(super_copy);
    6121             : 
    6122         221 :         ptr = super_copy->sys_chunk_array;
    6123             :         sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
    6124             :         cur = 0;
    6125             : 
    6126         873 :         while (cur < array_size) {
    6127             :                 disk_key = (struct btrfs_disk_key *)ptr;
    6128             :                 btrfs_disk_key_to_cpu(&key, disk_key);
    6129             : 
    6130         431 :                 len = sizeof(*disk_key); ptr += len;
    6131         431 :                 sb_ptr += len;
    6132         431 :                 cur += len;
    6133             : 
    6134         431 :                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
    6135         431 :                         chunk = (struct btrfs_chunk *)sb_ptr;
    6136         431 :                         ret = read_one_chunk(root, &key, sb, chunk);
    6137         431 :                         if (ret)
    6138             :                                 break;
    6139             :                         num_stripes = btrfs_chunk_num_stripes(sb, chunk);
    6140         862 :                         len = btrfs_chunk_item_size(num_stripes);
    6141             :                 } else {
    6142             :                         ret = -EIO;
    6143             :                         break;
    6144             :                 }
    6145         431 :                 ptr += len;
    6146         431 :                 sb_ptr += len;
    6147         431 :                 cur += len;
    6148             :         }
    6149         221 :         free_extent_buffer(sb);
    6150         221 :         return ret;
    6151             : }
    6152             : 
    6153         663 : int btrfs_read_chunk_tree(struct btrfs_root *root)
    6154             : {
    6155             :         struct btrfs_path *path;
    6156        1610 :         struct extent_buffer *leaf;
    6157             :         struct btrfs_key key;
    6158             :         struct btrfs_key found_key;
    6159             :         int ret;
    6160             :         int slot;
    6161             : 
    6162         221 :         root = root->fs_info->chunk_root;
    6163             : 
    6164         221 :         path = btrfs_alloc_path();
    6165         221 :         if (!path)
    6166             :                 return -ENOMEM;
    6167             : 
    6168         221 :         mutex_lock(&uuid_mutex);
    6169             :         lock_chunks(root);
    6170             : 
    6171             :         /*
    6172             :          * Read all device items, and then all the chunk items. All
    6173             :          * device items are found before any chunk item (their object id
    6174             :          * is smaller than the lowest possible object id for a chunk
    6175             :          * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
    6176             :          */
    6177         221 :         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
    6178         221 :         key.offset = 0;
    6179         221 :         key.type = 0;
    6180         221 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    6181         221 :         if (ret < 0)
    6182             :                 goto error;
    6183             :         while (1) {
    6184        1610 :                 leaf = path->nodes[0];
    6185        1610 :                 slot = path->slots[0];
    6186        3220 :                 if (slot >= btrfs_header_nritems(leaf)) {
    6187         221 :                         ret = btrfs_next_leaf(root, path);
    6188         221 :                         if (ret == 0)
    6189           0 :                                 continue;
    6190         221 :                         if (ret < 0)
    6191             :                                 goto error;
    6192             :                         break;
    6193             :                 }
    6194        1389 :                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
    6195        1389 :                 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
    6196             :                         struct btrfs_dev_item *dev_item;
    6197         248 :                         dev_item = btrfs_item_ptr(leaf, slot,
    6198             :                                                   struct btrfs_dev_item);
    6199         248 :                         ret = read_one_dev(root, leaf, dev_item);
    6200         248 :                         if (ret)
    6201             :                                 goto error;
    6202        1141 :                 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
    6203             :                         struct btrfs_chunk *chunk;
    6204        1141 :                         chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
    6205        1141 :                         ret = read_one_chunk(root, &found_key, leaf, chunk);
    6206        1141 :                         if (ret)
    6207             :                                 goto error;
    6208             :                 }
    6209        1389 :                 path->slots[0]++;
    6210             :         }
    6211             :         ret = 0;
    6212             : error:
    6213             :         unlock_chunks(root);
    6214         221 :         mutex_unlock(&uuid_mutex);
    6215             : 
    6216         221 :         btrfs_free_path(path);
    6217         221 :         return ret;
    6218             : }
    6219             : 
    6220         221 : void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
    6221             : {
    6222         221 :         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
    6223             :         struct btrfs_device *device;
    6224             : 
    6225         663 :         while (fs_devices) {
    6226         221 :                 mutex_lock(&fs_devices->device_list_mutex);
    6227         469 :                 list_for_each_entry(device, &fs_devices->devices, dev_list)
    6228         248 :                         device->dev_root = fs_info->dev_root;
    6229         221 :                 mutex_unlock(&fs_devices->device_list_mutex);
    6230             : 
    6231         221 :                 fs_devices = fs_devices->seed;
    6232             :         }
    6233         221 : }
    6234             : 
    6235             : static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
    6236             : {
    6237             :         int i;
    6238             : 
    6239         595 :         for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
    6240             :                 btrfs_dev_stat_reset(dev, i);
    6241             : }
    6242             : 
    6243         221 : int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
    6244             : {
    6245             :         struct btrfs_key key;
    6246             :         struct btrfs_key found_key;
    6247         221 :         struct btrfs_root *dev_root = fs_info->dev_root;
    6248         221 :         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
    6249             :         struct extent_buffer *eb;
    6250             :         int slot;
    6251             :         int ret = 0;
    6252             :         struct btrfs_device *device;
    6253             :         struct btrfs_path *path = NULL;
    6254             :         int i;
    6255             : 
    6256         221 :         path = btrfs_alloc_path();
    6257         221 :         if (!path) {
    6258             :                 ret = -ENOMEM;
    6259             :                 goto out;
    6260             :         }
    6261             : 
    6262         221 :         mutex_lock(&fs_devices->device_list_mutex);
    6263         469 :         list_for_each_entry(device, &fs_devices->devices, dev_list) {
    6264             :                 int item_size;
    6265             :                 struct btrfs_dev_stats_item *ptr;
    6266             : 
    6267         248 :                 key.objectid = 0;
    6268         248 :                 key.type = BTRFS_DEV_STATS_KEY;
    6269         248 :                 key.offset = device->devid;
    6270         248 :                 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
    6271         248 :                 if (ret) {
    6272             :                         __btrfs_reset_dev_stats(device);
    6273         119 :                         device->dev_stats_valid = 1;
    6274         119 :                         btrfs_release_path(path);
    6275         119 :                         continue;
    6276             :                 }
    6277         129 :                 slot = path->slots[0];
    6278         129 :                 eb = path->nodes[0];
    6279         129 :                 btrfs_item_key_to_cpu(eb, &found_key, slot);
    6280         129 :                 item_size = btrfs_item_size_nr(eb, slot);
    6281             : 
    6282         129 :                 ptr = btrfs_item_ptr(eb, slot,
    6283             :                                      struct btrfs_dev_stats_item);
    6284             : 
    6285         903 :                 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
    6286         645 :                         if (item_size >= (1 + i) * sizeof(__le64))
    6287             :                                 btrfs_dev_stat_set(device, i,
    6288             :                                         btrfs_dev_stats_value(eb, ptr, i));
    6289             :                         else
    6290             :                                 btrfs_dev_stat_reset(device, i);
    6291             :                 }
    6292             : 
    6293         129 :                 device->dev_stats_valid = 1;
    6294         129 :                 btrfs_dev_stat_print_on_load(device);
    6295         129 :                 btrfs_release_path(path);
    6296             :         }
    6297         221 :         mutex_unlock(&fs_devices->device_list_mutex);
    6298             : 
    6299             : out:
    6300         221 :         btrfs_free_path(path);
    6301         221 :         return ret < 0 ? ret : 0;
    6302             : }
    6303             : 
    6304         221 : static int update_dev_stat_item(struct btrfs_trans_handle *trans,
    6305             :                                 struct btrfs_root *dev_root,
    6306             :                                 struct btrfs_device *device)
    6307             : {
    6308             :         struct btrfs_path *path;
    6309             :         struct btrfs_key key;
    6310             :         struct extent_buffer *eb;
    6311             :         struct btrfs_dev_stats_item *ptr;
    6312             :         int ret;
    6313             :         int i;
    6314             : 
    6315         221 :         key.objectid = 0;
    6316         221 :         key.type = BTRFS_DEV_STATS_KEY;
    6317         221 :         key.offset = device->devid;
    6318             : 
    6319         221 :         path = btrfs_alloc_path();
    6320         221 :         BUG_ON(!path);
    6321         221 :         ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
    6322         221 :         if (ret < 0) {
    6323           0 :                 printk_in_rcu(KERN_WARNING "BTRFS: "
    6324             :                         "error %d while searching for dev_stats item for device %s!\n",
    6325             :                               ret, rcu_str_deref(device->name));
    6326             :                 goto out;
    6327             :         }
    6328             : 
    6329         323 :         if (ret == 0 &&
    6330         102 :             btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
    6331             :                 /* need to delete old one and insert a new one */
    6332             :                 ret = btrfs_del_item(trans, dev_root, path);
    6333           0 :                 if (ret != 0) {
    6334           0 :                         printk_in_rcu(KERN_WARNING "BTRFS: "
    6335             :                                 "delete too small dev_stats item for device %s failed %d!\n",
    6336             :                                       rcu_str_deref(device->name), ret);
    6337             :                         goto out;
    6338             :                 }
    6339             :                 ret = 1;
    6340             :         }
    6341             : 
    6342         221 :         if (ret == 1) {
    6343             :                 /* need to insert a new item */
    6344         119 :                 btrfs_release_path(path);
    6345             :                 ret = btrfs_insert_empty_item(trans, dev_root, path,
    6346             :                                               &key, sizeof(*ptr));
    6347         119 :                 if (ret < 0) {
    6348           0 :                         printk_in_rcu(KERN_WARNING "BTRFS: "
    6349             :                                           "insert dev_stats item for device %s failed %d!\n",
    6350             :                                       rcu_str_deref(device->name), ret);
    6351             :                         goto out;
    6352             :                 }
    6353             :         }
    6354             : 
    6355         221 :         eb = path->nodes[0];
    6356         442 :         ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
    6357        1326 :         for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
    6358        1105 :                 btrfs_set_dev_stats_value(eb, ptr, i,
    6359             :                                           btrfs_dev_stat_read(device, i));
    6360         221 :         btrfs_mark_buffer_dirty(eb);
    6361             : 
    6362             : out:
    6363         221 :         btrfs_free_path(path);
    6364         221 :         return ret;
    6365             : }
    6366             : 
    6367             : /*
    6368             :  * called from commit_transaction. Writes all changed device stats to disk.
    6369             :  */
    6370        2098 : int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
    6371             :                         struct btrfs_fs_info *fs_info)
    6372             : {
    6373        2098 :         struct btrfs_root *dev_root = fs_info->dev_root;
    6374        2098 :         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
    6375             :         struct btrfs_device *device;
    6376             :         int ret = 0;
    6377             : 
    6378        2098 :         mutex_lock(&fs_devices->device_list_mutex);
    6379        4277 :         list_for_each_entry(device, &fs_devices->devices, dev_list) {
    6380        2179 :                 if (!device->dev_stats_valid || !device->dev_stats_dirty)
    6381        1958 :                         continue;
    6382             : 
    6383         221 :                 ret = update_dev_stat_item(trans, dev_root, device);
    6384         221 :                 if (!ret)
    6385         221 :                         device->dev_stats_dirty = 0;
    6386             :         }
    6387        2098 :         mutex_unlock(&fs_devices->device_list_mutex);
    6388             : 
    6389        2098 :         return ret;
    6390             : }
    6391             : 
    6392           0 : void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
    6393             : {
    6394             :         btrfs_dev_stat_inc(dev, index);
    6395           0 :         btrfs_dev_stat_print_on_error(dev);
    6396           0 : }
    6397             : 
    6398           0 : static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
    6399             : {
    6400           0 :         if (!dev->dev_stats_valid)
    6401           0 :                 return;
    6402           0 :         printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
    6403             :                            "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
    6404             :                            rcu_str_deref(dev->name),
    6405             :                            btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
    6406             :                            btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
    6407             :                            btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
    6408             :                            btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
    6409             :                            btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
    6410             : }
    6411             : 
    6412         129 : static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
    6413             : {
    6414             :         int i;
    6415             : 
    6416         774 :         for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
    6417         645 :                 if (btrfs_dev_stat_read(dev, i) != 0)
    6418             :                         break;
    6419         129 :         if (i == BTRFS_DEV_STAT_VALUES_MAX)
    6420         129 :                 return; /* all values == 0, suppress message */
    6421             : 
    6422           0 :         printk_in_rcu(KERN_INFO "BTRFS: "
    6423             :                    "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
    6424             :                rcu_str_deref(dev->name),
    6425             :                btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
    6426             :                btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
    6427             :                btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
    6428             :                btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
    6429             :                btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
    6430             : }
    6431             : 
    6432          10 : int btrfs_get_dev_stats(struct btrfs_root *root,
    6433             :                         struct btrfs_ioctl_get_dev_stats *stats)
    6434             : {
    6435             :         struct btrfs_device *dev;
    6436          10 :         struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
    6437             :         int i;
    6438             : 
    6439          10 :         mutex_lock(&fs_devices->device_list_mutex);
    6440          10 :         dev = btrfs_find_device(root->fs_info, stats->devid, NULL, NULL);
    6441          10 :         mutex_unlock(&fs_devices->device_list_mutex);
    6442             : 
    6443          10 :         if (!dev) {
    6444           3 :                 btrfs_warn(root->fs_info, "get dev_stats failed, device not found");
    6445           3 :                 return -ENODEV;
    6446           7 :         } else if (!dev->dev_stats_valid) {
    6447           0 :                 btrfs_warn(root->fs_info, "get dev_stats failed, not yet valid");
    6448           0 :                 return -ENODEV;
    6449           7 :         } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
    6450           0 :                 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
    6451           0 :                         if (stats->nr_items > i)
    6452           0 :                                 stats->values[i] =
    6453             :                                         btrfs_dev_stat_read_and_reset(dev, i);
    6454             :                         else
    6455             :                                 btrfs_dev_stat_reset(dev, i);
    6456             :                 }
    6457             :         } else {
    6458          35 :                 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
    6459          35 :                         if (stats->nr_items > i)
    6460          35 :                                 stats->values[i] = btrfs_dev_stat_read(dev, i);
    6461             :         }
    6462           7 :         if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
    6463           0 :                 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
    6464             :         return 0;
    6465             : }
    6466             : 
    6467           8 : int btrfs_scratch_superblock(struct btrfs_device *device)
    6468             : {
    6469             :         struct buffer_head *bh;
    6470             :         struct btrfs_super_block *disk_super;
    6471             : 
    6472           8 :         bh = btrfs_read_dev_super(device->bdev);
    6473           8 :         if (!bh)
    6474             :                 return -EINVAL;
    6475           8 :         disk_super = (struct btrfs_super_block *)bh->b_data;
    6476             : 
    6477           8 :         memset(&disk_super->magic, 0, sizeof(disk_super->magic));
    6478             :         set_buffer_dirty(bh);
    6479           8 :         sync_dirty_buffer(bh);
    6480             :         brelse(bh);
    6481             : 
    6482             :         return 0;
    6483             : }

Generated by: LCOV version 1.10