Line data Source code
1 : /*
2 : * Copyright (C) 2011 STRATO. All rights reserved.
3 : *
4 : * This program is free software; you can redistribute it and/or
5 : * modify it under the terms of the GNU General Public
6 : * License v2 as published by the Free Software Foundation.
7 : *
8 : * This program is distributed in the hope that it will be useful,
9 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 : * General Public License for more details.
12 : *
13 : * You should have received a copy of the GNU General Public
14 : * License along with this program; if not, write to the
15 : * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 : * Boston, MA 021110-1307, USA.
17 : */
18 :
19 : #include <linux/sched.h>
20 : #include <linux/pagemap.h>
21 : #include <linux/writeback.h>
22 : #include <linux/blkdev.h>
23 : #include <linux/rbtree.h>
24 : #include <linux/slab.h>
25 : #include <linux/workqueue.h>
26 : #include <linux/btrfs.h>
27 :
28 : #include "ctree.h"
29 : #include "transaction.h"
30 : #include "disk-io.h"
31 : #include "locking.h"
32 : #include "ulist.h"
33 : #include "backref.h"
34 : #include "extent_io.h"
35 : #include "qgroup.h"
36 :
37 : /* TODO XXX FIXME
38 : * - subvol delete -> delete when ref goes to 0? delete limits also?
39 : * - reorganize keys
40 : * - compressed
41 : * - sync
42 : * - copy also limits on subvol creation
43 : * - limit
44 : * - caches fuer ulists
45 : * - performance benchmarks
46 : * - check all ioctl parameters
47 : */
48 :
49 : /*
50 : * one struct for each qgroup, organized in fs_info->qgroup_tree.
51 : */
52 : struct btrfs_qgroup {
53 : u64 qgroupid;
54 :
55 : /*
56 : * state
57 : */
58 : u64 rfer; /* referenced */
59 : u64 rfer_cmpr; /* referenced compressed */
60 : u64 excl; /* exclusive */
61 : u64 excl_cmpr; /* exclusive compressed */
62 :
63 : /*
64 : * limits
65 : */
66 : u64 lim_flags; /* which limits are set */
67 : u64 max_rfer;
68 : u64 max_excl;
69 : u64 rsv_rfer;
70 : u64 rsv_excl;
71 :
72 : /*
73 : * reservation tracking
74 : */
75 : u64 reserved;
76 :
77 : /*
78 : * lists
79 : */
80 : struct list_head groups; /* groups this group is member of */
81 : struct list_head members; /* groups that are members of this group */
82 : struct list_head dirty; /* dirty groups */
83 : struct rb_node node; /* tree of qgroups */
84 :
85 : /*
86 : * temp variables for accounting operations
87 : */
88 : u64 old_refcnt;
89 : u64 new_refcnt;
90 : };
91 :
92 : /*
93 : * glue structure to represent the relations between qgroups.
94 : */
95 : struct btrfs_qgroup_list {
96 : struct list_head next_group;
97 : struct list_head next_member;
98 : struct btrfs_qgroup *group;
99 : struct btrfs_qgroup *member;
100 : };
101 :
102 : #define ptr_to_u64(x) ((u64)(uintptr_t)x)
103 : #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
104 :
105 : static int
106 : qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
107 : int init_flags);
108 : static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
109 :
110 : /* must be called with qgroup_ioctl_lock held */
111 : static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
112 : u64 qgroupid)
113 : {
114 : struct rb_node *n = fs_info->qgroup_tree.rb_node;
115 : struct btrfs_qgroup *qgroup;
116 :
117 75943 : while (n) {
118 75783 : qgroup = rb_entry(n, struct btrfs_qgroup, node);
119 75783 : if (qgroup->qgroupid < qgroupid)
120 18208 : n = n->rb_left;
121 57575 : else if (qgroup->qgroupid > qgroupid)
122 16142 : n = n->rb_right;
123 : else
124 : return qgroup;
125 : }
126 : return NULL;
127 : }
128 :
129 : /* must be called with qgroup_lock held */
130 39 : static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
131 : u64 qgroupid)
132 : {
133 39 : struct rb_node **p = &fs_info->qgroup_tree.rb_node;
134 : struct rb_node *parent = NULL;
135 : struct btrfs_qgroup *qgroup;
136 :
137 160 : while (*p) {
138 : parent = *p;
139 82 : qgroup = rb_entry(parent, struct btrfs_qgroup, node);
140 :
141 82 : if (qgroup->qgroupid < qgroupid)
142 67 : p = &(*p)->rb_left;
143 15 : else if (qgroup->qgroupid > qgroupid)
144 15 : p = &(*p)->rb_right;
145 : else
146 : return qgroup;
147 : }
148 :
149 39 : qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
150 39 : if (!qgroup)
151 : return ERR_PTR(-ENOMEM);
152 :
153 39 : qgroup->qgroupid = qgroupid;
154 39 : INIT_LIST_HEAD(&qgroup->groups);
155 39 : INIT_LIST_HEAD(&qgroup->members);
156 39 : INIT_LIST_HEAD(&qgroup->dirty);
157 :
158 39 : rb_link_node(&qgroup->node, parent, p);
159 39 : rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
160 :
161 39 : return qgroup;
162 : }
163 :
164 39 : static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
165 : {
166 : struct btrfs_qgroup_list *list;
167 :
168 39 : list_del(&qgroup->dirty);
169 117 : while (!list_empty(&qgroup->groups)) {
170 : list = list_first_entry(&qgroup->groups,
171 : struct btrfs_qgroup_list, next_group);
172 0 : list_del(&list->next_group);
173 0 : list_del(&list->next_member);
174 0 : kfree(list);
175 : }
176 :
177 118 : while (!list_empty(&qgroup->members)) {
178 20 : list = list_first_entry(&qgroup->members,
179 : struct btrfs_qgroup_list, next_member);
180 20 : list_del(&list->next_group);
181 20 : list_del(&list->next_member);
182 20 : kfree(list);
183 : }
184 39 : kfree(qgroup);
185 39 : }
186 :
187 : /* must be called with qgroup_lock held */
188 0 : static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
189 : {
190 : struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
191 :
192 0 : if (!qgroup)
193 : return -ENOENT;
194 :
195 0 : rb_erase(&qgroup->node, &fs_info->qgroup_tree);
196 0 : __del_qgroup_rb(qgroup);
197 0 : return 0;
198 : }
199 :
200 : /* must be called with qgroup_lock held */
201 20 : static int add_relation_rb(struct btrfs_fs_info *fs_info,
202 : u64 memberid, u64 parentid)
203 : {
204 : struct btrfs_qgroup *member;
205 : struct btrfs_qgroup *parent;
206 : struct btrfs_qgroup_list *list;
207 :
208 : member = find_qgroup_rb(fs_info, memberid);
209 : parent = find_qgroup_rb(fs_info, parentid);
210 20 : if (!member || !parent)
211 : return -ENOENT;
212 :
213 20 : list = kzalloc(sizeof(*list), GFP_ATOMIC);
214 20 : if (!list)
215 : return -ENOMEM;
216 :
217 20 : list->group = parent;
218 20 : list->member = member;
219 20 : list_add_tail(&list->next_group, &member->groups);
220 20 : list_add_tail(&list->next_member, &parent->members);
221 :
222 20 : return 0;
223 : }
224 :
225 : /* must be called with qgroup_lock held */
226 0 : static int del_relation_rb(struct btrfs_fs_info *fs_info,
227 : u64 memberid, u64 parentid)
228 : {
229 : struct btrfs_qgroup *member;
230 : struct btrfs_qgroup *parent;
231 : struct btrfs_qgroup_list *list;
232 :
233 : member = find_qgroup_rb(fs_info, memberid);
234 : parent = find_qgroup_rb(fs_info, parentid);
235 0 : if (!member || !parent)
236 : return -ENOENT;
237 :
238 0 : list_for_each_entry(list, &member->groups, next_group) {
239 0 : if (list->group == parent) {
240 0 : list_del(&list->next_group);
241 0 : list_del(&list->next_member);
242 0 : kfree(list);
243 0 : return 0;
244 : }
245 : }
246 : return -ENOENT;
247 : }
248 :
249 : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
250 : int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
251 : u64 rfer, u64 excl)
252 : {
253 : struct btrfs_qgroup *qgroup;
254 :
255 : qgroup = find_qgroup_rb(fs_info, qgroupid);
256 : if (!qgroup)
257 : return -EINVAL;
258 : if (qgroup->rfer != rfer || qgroup->excl != excl)
259 : return -EINVAL;
260 : return 0;
261 : }
262 : #endif
263 :
264 : /*
265 : * The full config is read in one go, only called from open_ctree()
266 : * It doesn't use any locking, as at this point we're still single-threaded
267 : */
268 253 : int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
269 : {
270 : struct btrfs_key key;
271 : struct btrfs_key found_key;
272 221 : struct btrfs_root *quota_root = fs_info->quota_root;
273 : struct btrfs_path *path = NULL;
274 : struct extent_buffer *l;
275 : int slot;
276 : int ret = 0;
277 : u64 flags = 0;
278 : u64 rescan_progress = 0;
279 :
280 221 : if (!fs_info->quota_enabled)
281 : return 0;
282 :
283 3 : fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
284 3 : if (!fs_info->qgroup_ulist) {
285 : ret = -ENOMEM;
286 : goto out;
287 : }
288 :
289 3 : path = btrfs_alloc_path();
290 3 : if (!path) {
291 : ret = -ENOMEM;
292 : goto out;
293 : }
294 :
295 : /* default this to quota off, in case no status key is found */
296 3 : fs_info->qgroup_flags = 0;
297 :
298 : /*
299 : * pass 1: read status, all qgroup infos and limits
300 : */
301 3 : key.objectid = 0;
302 3 : key.type = 0;
303 3 : key.offset = 0;
304 3 : ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
305 3 : if (ret)
306 : goto out;
307 :
308 : while (1) {
309 : struct btrfs_qgroup *qgroup;
310 :
311 55 : slot = path->slots[0];
312 55 : l = path->nodes[0];
313 55 : btrfs_item_key_to_cpu(l, &found_key, slot);
314 :
315 55 : if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
316 : struct btrfs_qgroup_status_item *ptr;
317 :
318 3 : ptr = btrfs_item_ptr(l, slot,
319 : struct btrfs_qgroup_status_item);
320 :
321 3 : if (btrfs_qgroup_status_version(l, ptr) !=
322 : BTRFS_QGROUP_STATUS_VERSION) {
323 0 : btrfs_err(fs_info,
324 : "old qgroup version, quota disabled");
325 0 : goto out;
326 : }
327 3 : if (btrfs_qgroup_status_generation(l, ptr) !=
328 3 : fs_info->generation) {
329 : flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330 0 : btrfs_err(fs_info,
331 : "qgroup generation mismatch, "
332 : "marked as inconsistent");
333 : }
334 3 : fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
335 : ptr);
336 : rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
337 3 : goto next1;
338 : }
339 :
340 52 : if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
341 : found_key.type != BTRFS_QGROUP_LIMIT_KEY)
342 : goto next1;
343 :
344 32 : qgroup = find_qgroup_rb(fs_info, found_key.offset);
345 32 : if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
346 16 : (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
347 0 : btrfs_err(fs_info, "inconsitent qgroup config");
348 : flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
349 : }
350 32 : if (!qgroup) {
351 16 : qgroup = add_qgroup_rb(fs_info, found_key.offset);
352 16 : if (IS_ERR(qgroup)) {
353 0 : ret = PTR_ERR(qgroup);
354 0 : goto out;
355 : }
356 : }
357 32 : switch (found_key.type) {
358 : case BTRFS_QGROUP_INFO_KEY: {
359 : struct btrfs_qgroup_info_item *ptr;
360 :
361 16 : ptr = btrfs_item_ptr(l, slot,
362 : struct btrfs_qgroup_info_item);
363 16 : qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
364 16 : qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
365 16 : qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
366 16 : qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
367 : /* generation currently unused */
368 16 : break;
369 : }
370 : case BTRFS_QGROUP_LIMIT_KEY: {
371 : struct btrfs_qgroup_limit_item *ptr;
372 :
373 16 : ptr = btrfs_item_ptr(l, slot,
374 : struct btrfs_qgroup_limit_item);
375 16 : qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
376 16 : qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
377 16 : qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
378 16 : qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
379 16 : qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
380 16 : break;
381 : }
382 : }
383 : next1:
384 : ret = btrfs_next_item(quota_root, path);
385 55 : if (ret < 0)
386 : goto out;
387 55 : if (ret)
388 : break;
389 : }
390 3 : btrfs_release_path(path);
391 :
392 : /*
393 : * pass 2: read all qgroup relations
394 : */
395 3 : key.objectid = 0;
396 3 : key.type = BTRFS_QGROUP_RELATION_KEY;
397 3 : key.offset = 0;
398 3 : ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
399 3 : if (ret)
400 : goto out;
401 : while (1) {
402 20 : slot = path->slots[0];
403 20 : l = path->nodes[0];
404 20 : btrfs_item_key_to_cpu(l, &found_key, slot);
405 :
406 20 : if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
407 : goto next2;
408 :
409 20 : if (found_key.objectid > found_key.offset) {
410 : /* parent <- member, not needed to build config */
411 : /* FIXME should we omit the key completely? */
412 : goto next2;
413 : }
414 :
415 10 : ret = add_relation_rb(fs_info, found_key.objectid,
416 : found_key.offset);
417 10 : if (ret == -ENOENT) {
418 0 : btrfs_warn(fs_info,
419 : "orphan qgroup relation 0x%llx->0x%llx",
420 : found_key.objectid, found_key.offset);
421 : ret = 0; /* ignore the error */
422 : }
423 10 : if (ret)
424 : goto out;
425 : next2:
426 : ret = btrfs_next_item(quota_root, path);
427 20 : if (ret < 0)
428 : goto out;
429 20 : if (ret)
430 : break;
431 : }
432 : out:
433 3 : fs_info->qgroup_flags |= flags;
434 3 : if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
435 0 : fs_info->quota_enabled = 0;
436 0 : fs_info->pending_quota_state = 0;
437 3 : } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
438 : ret >= 0) {
439 0 : ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
440 : }
441 3 : btrfs_free_path(path);
442 :
443 3 : if (ret < 0) {
444 0 : ulist_free(fs_info->qgroup_ulist);
445 0 : fs_info->qgroup_ulist = NULL;
446 0 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
447 : }
448 :
449 3 : return ret < 0 ? ret : 0;
450 : }
451 :
452 : /*
453 : * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
454 : * first two are in single-threaded paths.And for the third one, we have set
455 : * quota_root to be null with qgroup_lock held before, so it is safe to clean
456 : * up the in-memory structures without qgroup_lock held.
457 : */
458 221 : void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
459 : {
460 : struct rb_node *n;
461 : struct btrfs_qgroup *qgroup;
462 :
463 481 : while ((n = rb_first(&fs_info->qgroup_tree))) {
464 39 : qgroup = rb_entry(n, struct btrfs_qgroup, node);
465 39 : rb_erase(n, &fs_info->qgroup_tree);
466 39 : __del_qgroup_rb(qgroup);
467 : }
468 : /*
469 : * we call btrfs_free_qgroup_config() when umounting
470 : * filesystem and disabling quota, so we set qgroup_ulit
471 : * to be null here to avoid double free.
472 : */
473 221 : ulist_free(fs_info->qgroup_ulist);
474 221 : fs_info->qgroup_ulist = NULL;
475 221 : }
476 :
477 20 : static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
478 : struct btrfs_root *quota_root,
479 : u64 src, u64 dst)
480 : {
481 : int ret;
482 : struct btrfs_path *path;
483 : struct btrfs_key key;
484 :
485 20 : path = btrfs_alloc_path();
486 20 : if (!path)
487 : return -ENOMEM;
488 :
489 20 : key.objectid = src;
490 20 : key.type = BTRFS_QGROUP_RELATION_KEY;
491 20 : key.offset = dst;
492 :
493 : ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
494 :
495 20 : btrfs_mark_buffer_dirty(path->nodes[0]);
496 :
497 20 : btrfs_free_path(path);
498 20 : return ret;
499 : }
500 :
501 0 : static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
502 : struct btrfs_root *quota_root,
503 : u64 src, u64 dst)
504 : {
505 : int ret;
506 : struct btrfs_path *path;
507 : struct btrfs_key key;
508 :
509 0 : path = btrfs_alloc_path();
510 0 : if (!path)
511 : return -ENOMEM;
512 :
513 0 : key.objectid = src;
514 0 : key.type = BTRFS_QGROUP_RELATION_KEY;
515 0 : key.offset = dst;
516 :
517 0 : ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
518 0 : if (ret < 0)
519 : goto out;
520 :
521 0 : if (ret > 0) {
522 : ret = -ENOENT;
523 : goto out;
524 : }
525 :
526 : ret = btrfs_del_item(trans, quota_root, path);
527 : out:
528 0 : btrfs_free_path(path);
529 0 : return ret;
530 : }
531 :
532 23 : static int add_qgroup_item(struct btrfs_trans_handle *trans,
533 : struct btrfs_root *quota_root, u64 qgroupid)
534 : {
535 : int ret;
536 : struct btrfs_path *path;
537 : struct btrfs_qgroup_info_item *qgroup_info;
538 : struct btrfs_qgroup_limit_item *qgroup_limit;
539 : struct extent_buffer *leaf;
540 : struct btrfs_key key;
541 :
542 : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
543 : if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, "a_root->state)))
544 : return 0;
545 : #endif
546 23 : path = btrfs_alloc_path();
547 23 : if (!path)
548 : return -ENOMEM;
549 :
550 23 : key.objectid = 0;
551 23 : key.type = BTRFS_QGROUP_INFO_KEY;
552 23 : key.offset = qgroupid;
553 :
554 : ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
555 : sizeof(*qgroup_info));
556 23 : if (ret)
557 : goto out;
558 :
559 23 : leaf = path->nodes[0];
560 46 : qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
561 : struct btrfs_qgroup_info_item);
562 23 : btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
563 : btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
564 : btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
565 : btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
566 : btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
567 :
568 23 : btrfs_mark_buffer_dirty(leaf);
569 :
570 23 : btrfs_release_path(path);
571 :
572 23 : key.type = BTRFS_QGROUP_LIMIT_KEY;
573 : ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
574 : sizeof(*qgroup_limit));
575 23 : if (ret)
576 : goto out;
577 :
578 23 : leaf = path->nodes[0];
579 46 : qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
580 : struct btrfs_qgroup_limit_item);
581 : btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
582 : btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
583 : btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
584 : btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
585 : btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
586 :
587 23 : btrfs_mark_buffer_dirty(leaf);
588 :
589 : ret = 0;
590 : out:
591 23 : btrfs_free_path(path);
592 23 : return ret;
593 : }
594 :
595 0 : static int del_qgroup_item(struct btrfs_trans_handle *trans,
596 : struct btrfs_root *quota_root, u64 qgroupid)
597 : {
598 : int ret;
599 : struct btrfs_path *path;
600 : struct btrfs_key key;
601 :
602 0 : path = btrfs_alloc_path();
603 0 : if (!path)
604 : return -ENOMEM;
605 :
606 0 : key.objectid = 0;
607 0 : key.type = BTRFS_QGROUP_INFO_KEY;
608 0 : key.offset = qgroupid;
609 0 : ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
610 0 : if (ret < 0)
611 : goto out;
612 :
613 0 : if (ret > 0) {
614 : ret = -ENOENT;
615 : goto out;
616 : }
617 :
618 : ret = btrfs_del_item(trans, quota_root, path);
619 0 : if (ret)
620 : goto out;
621 :
622 0 : btrfs_release_path(path);
623 :
624 0 : key.type = BTRFS_QGROUP_LIMIT_KEY;
625 0 : ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
626 0 : if (ret < 0)
627 : goto out;
628 :
629 0 : if (ret > 0) {
630 : ret = -ENOENT;
631 : goto out;
632 : }
633 :
634 : ret = btrfs_del_item(trans, quota_root, path);
635 :
636 : out:
637 0 : btrfs_free_path(path);
638 0 : return ret;
639 : }
640 :
641 3 : static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
642 : struct btrfs_root *root, u64 qgroupid,
643 : u64 flags, u64 max_rfer, u64 max_excl,
644 : u64 rsv_rfer, u64 rsv_excl)
645 : {
646 : struct btrfs_path *path;
647 : struct btrfs_key key;
648 : struct extent_buffer *l;
649 : struct btrfs_qgroup_limit_item *qgroup_limit;
650 : int ret;
651 : int slot;
652 :
653 3 : key.objectid = 0;
654 3 : key.type = BTRFS_QGROUP_LIMIT_KEY;
655 3 : key.offset = qgroupid;
656 :
657 3 : path = btrfs_alloc_path();
658 3 : if (!path)
659 : return -ENOMEM;
660 :
661 3 : ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
662 3 : if (ret > 0)
663 : ret = -ENOENT;
664 :
665 3 : if (ret)
666 : goto out;
667 :
668 3 : l = path->nodes[0];
669 3 : slot = path->slots[0];
670 3 : qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
671 : btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
672 : btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
673 : btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
674 : btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
675 : btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
676 :
677 3 : btrfs_mark_buffer_dirty(l);
678 :
679 : out:
680 3 : btrfs_free_path(path);
681 3 : return ret;
682 : }
683 :
684 150 : static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
685 : struct btrfs_root *root,
686 : struct btrfs_qgroup *qgroup)
687 : {
688 : struct btrfs_path *path;
689 : struct btrfs_key key;
690 : struct extent_buffer *l;
691 : struct btrfs_qgroup_info_item *qgroup_info;
692 : int ret;
693 : int slot;
694 :
695 : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
696 : if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
697 : return 0;
698 : #endif
699 150 : key.objectid = 0;
700 150 : key.type = BTRFS_QGROUP_INFO_KEY;
701 150 : key.offset = qgroup->qgroupid;
702 :
703 150 : path = btrfs_alloc_path();
704 150 : if (!path)
705 : return -ENOMEM;
706 :
707 150 : ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
708 150 : if (ret > 0)
709 : ret = -ENOENT;
710 :
711 150 : if (ret)
712 : goto out;
713 :
714 150 : l = path->nodes[0];
715 150 : slot = path->slots[0];
716 150 : qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
717 150 : btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
718 150 : btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
719 150 : btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
720 150 : btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
721 150 : btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
722 :
723 150 : btrfs_mark_buffer_dirty(l);
724 :
725 : out:
726 150 : btrfs_free_path(path);
727 150 : return ret;
728 : }
729 :
730 127 : static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
731 : struct btrfs_fs_info *fs_info,
732 : struct btrfs_root *root)
733 : {
734 : struct btrfs_path *path;
735 : struct btrfs_key key;
736 : struct extent_buffer *l;
737 : struct btrfs_qgroup_status_item *ptr;
738 : int ret;
739 : int slot;
740 :
741 127 : key.objectid = 0;
742 127 : key.type = BTRFS_QGROUP_STATUS_KEY;
743 127 : key.offset = 0;
744 :
745 127 : path = btrfs_alloc_path();
746 127 : if (!path)
747 : return -ENOMEM;
748 :
749 127 : ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
750 127 : if (ret > 0)
751 : ret = -ENOENT;
752 :
753 127 : if (ret)
754 : goto out;
755 :
756 127 : l = path->nodes[0];
757 127 : slot = path->slots[0];
758 127 : ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
759 127 : btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
760 127 : btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
761 127 : btrfs_set_qgroup_status_rescan(l, ptr,
762 : fs_info->qgroup_rescan_progress.objectid);
763 :
764 127 : btrfs_mark_buffer_dirty(l);
765 :
766 : out:
767 127 : btrfs_free_path(path);
768 : return ret;
769 : }
770 :
771 : /*
772 : * called with qgroup_lock held
773 : */
774 0 : static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
775 : struct btrfs_root *root)
776 : {
777 : struct btrfs_path *path;
778 : struct btrfs_key key;
779 0 : struct extent_buffer *leaf = NULL;
780 : int ret;
781 : int nr = 0;
782 :
783 0 : path = btrfs_alloc_path();
784 0 : if (!path)
785 : return -ENOMEM;
786 :
787 0 : path->leave_spinning = 1;
788 :
789 0 : key.objectid = 0;
790 0 : key.offset = 0;
791 0 : key.type = 0;
792 :
793 : while (1) {
794 0 : ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
795 0 : if (ret < 0)
796 : goto out;
797 0 : leaf = path->nodes[0];
798 0 : nr = btrfs_header_nritems(leaf);
799 0 : if (!nr)
800 : break;
801 : /*
802 : * delete the leaf one by one
803 : * since the whole tree is going
804 : * to be deleted.
805 : */
806 0 : path->slots[0] = 0;
807 0 : ret = btrfs_del_items(trans, root, path, 0, nr);
808 0 : if (ret)
809 : goto out;
810 :
811 0 : btrfs_release_path(path);
812 0 : }
813 : ret = 0;
814 : out:
815 0 : root->fs_info->pending_quota_state = 0;
816 0 : btrfs_free_path(path);
817 0 : return ret;
818 : }
819 :
820 6 : int btrfs_quota_enable(struct btrfs_trans_handle *trans,
821 : struct btrfs_fs_info *fs_info)
822 : {
823 : struct btrfs_root *quota_root;
824 6 : struct btrfs_root *tree_root = fs_info->tree_root;
825 : struct btrfs_path *path = NULL;
826 : struct btrfs_qgroup_status_item *ptr;
827 : struct extent_buffer *leaf;
828 : struct btrfs_key key;
829 : struct btrfs_key found_key;
830 : struct btrfs_qgroup *qgroup = NULL;
831 : int ret = 0;
832 : int slot;
833 :
834 6 : mutex_lock(&fs_info->qgroup_ioctl_lock);
835 6 : if (fs_info->quota_root) {
836 0 : fs_info->pending_quota_state = 1;
837 0 : goto out;
838 : }
839 :
840 6 : fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
841 6 : if (!fs_info->qgroup_ulist) {
842 : ret = -ENOMEM;
843 : goto out;
844 : }
845 :
846 : /*
847 : * initially create the quota tree
848 : */
849 6 : quota_root = btrfs_create_tree(trans, fs_info,
850 : BTRFS_QUOTA_TREE_OBJECTID);
851 6 : if (IS_ERR(quota_root)) {
852 0 : ret = PTR_ERR(quota_root);
853 0 : goto out;
854 : }
855 :
856 6 : path = btrfs_alloc_path();
857 6 : if (!path) {
858 : ret = -ENOMEM;
859 : goto out_free_root;
860 : }
861 :
862 6 : key.objectid = 0;
863 6 : key.type = BTRFS_QGROUP_STATUS_KEY;
864 6 : key.offset = 0;
865 :
866 : ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
867 : sizeof(*ptr));
868 6 : if (ret)
869 : goto out_free_path;
870 :
871 6 : leaf = path->nodes[0];
872 12 : ptr = btrfs_item_ptr(leaf, path->slots[0],
873 : struct btrfs_qgroup_status_item);
874 6 : btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
875 : btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
876 6 : fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
877 : BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
878 : btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
879 : btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
880 :
881 6 : btrfs_mark_buffer_dirty(leaf);
882 :
883 6 : key.objectid = 0;
884 6 : key.type = BTRFS_ROOT_REF_KEY;
885 6 : key.offset = 0;
886 :
887 6 : btrfs_release_path(path);
888 6 : ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
889 6 : if (ret > 0)
890 : goto out_add_root;
891 6 : if (ret < 0)
892 : goto out_free_path;
893 :
894 :
895 : while (1) {
896 102 : slot = path->slots[0];
897 102 : leaf = path->nodes[0];
898 102 : btrfs_item_key_to_cpu(leaf, &found_key, slot);
899 :
900 102 : if (found_key.type == BTRFS_ROOT_REF_KEY) {
901 5 : ret = add_qgroup_item(trans, quota_root,
902 : found_key.offset);
903 5 : if (ret)
904 : goto out_free_path;
905 :
906 5 : qgroup = add_qgroup_rb(fs_info, found_key.offset);
907 5 : if (IS_ERR(qgroup)) {
908 0 : ret = PTR_ERR(qgroup);
909 0 : goto out_free_path;
910 : }
911 : }
912 : ret = btrfs_next_item(tree_root, path);
913 102 : if (ret < 0)
914 : goto out_free_path;
915 102 : if (ret)
916 : break;
917 : }
918 :
919 : out_add_root:
920 6 : btrfs_release_path(path);
921 6 : ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
922 6 : if (ret)
923 : goto out_free_path;
924 :
925 6 : qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
926 6 : if (IS_ERR(qgroup)) {
927 0 : ret = PTR_ERR(qgroup);
928 0 : goto out_free_path;
929 : }
930 : spin_lock(&fs_info->qgroup_lock);
931 6 : fs_info->quota_root = quota_root;
932 6 : fs_info->pending_quota_state = 1;
933 : spin_unlock(&fs_info->qgroup_lock);
934 : out_free_path:
935 6 : btrfs_free_path(path);
936 : out_free_root:
937 6 : if (ret) {
938 0 : free_extent_buffer(quota_root->node);
939 0 : free_extent_buffer(quota_root->commit_root);
940 0 : kfree(quota_root);
941 : }
942 : out:
943 6 : if (ret) {
944 0 : ulist_free(fs_info->qgroup_ulist);
945 0 : fs_info->qgroup_ulist = NULL;
946 : }
947 6 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
948 6 : return ret;
949 : }
950 :
951 0 : int btrfs_quota_disable(struct btrfs_trans_handle *trans,
952 : struct btrfs_fs_info *fs_info)
953 : {
954 0 : struct btrfs_root *tree_root = fs_info->tree_root;
955 : struct btrfs_root *quota_root;
956 : int ret = 0;
957 :
958 0 : mutex_lock(&fs_info->qgroup_ioctl_lock);
959 0 : if (!fs_info->quota_root)
960 : goto out;
961 : spin_lock(&fs_info->qgroup_lock);
962 0 : fs_info->quota_enabled = 0;
963 0 : fs_info->pending_quota_state = 0;
964 0 : quota_root = fs_info->quota_root;
965 0 : fs_info->quota_root = NULL;
966 : spin_unlock(&fs_info->qgroup_lock);
967 :
968 0 : btrfs_free_qgroup_config(fs_info);
969 :
970 0 : ret = btrfs_clean_quota_tree(trans, quota_root);
971 0 : if (ret)
972 : goto out;
973 :
974 0 : ret = btrfs_del_root(trans, tree_root, "a_root->root_key);
975 0 : if (ret)
976 : goto out;
977 :
978 0 : list_del("a_root->dirty_list);
979 :
980 0 : btrfs_tree_lock(quota_root->node);
981 0 : clean_tree_block(trans, tree_root, quota_root->node);
982 0 : btrfs_tree_unlock(quota_root->node);
983 0 : btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
984 :
985 0 : free_extent_buffer(quota_root->node);
986 0 : free_extent_buffer(quota_root->commit_root);
987 0 : kfree(quota_root);
988 : out:
989 0 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
990 0 : return ret;
991 : }
992 :
993 : static void qgroup_dirty(struct btrfs_fs_info *fs_info,
994 : struct btrfs_qgroup *qgroup)
995 : {
996 41692 : if (list_empty(&qgroup->dirty))
997 150 : list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
998 : }
999 :
1000 0 : int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1001 0 : struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1002 : {
1003 : struct btrfs_root *quota_root;
1004 : struct btrfs_qgroup *parent;
1005 : struct btrfs_qgroup *member;
1006 : struct btrfs_qgroup_list *list;
1007 : int ret = 0;
1008 :
1009 0 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1010 0 : quota_root = fs_info->quota_root;
1011 0 : if (!quota_root) {
1012 : ret = -EINVAL;
1013 : goto out;
1014 : }
1015 : member = find_qgroup_rb(fs_info, src);
1016 : parent = find_qgroup_rb(fs_info, dst);
1017 0 : if (!member || !parent) {
1018 : ret = -EINVAL;
1019 : goto out;
1020 : }
1021 :
1022 : /* check if such qgroup relation exist firstly */
1023 0 : list_for_each_entry(list, &member->groups, next_group) {
1024 0 : if (list->group == parent) {
1025 : ret = -EEXIST;
1026 : goto out;
1027 : }
1028 : }
1029 :
1030 0 : ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1031 0 : if (ret)
1032 : goto out;
1033 :
1034 0 : ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1035 0 : if (ret) {
1036 0 : del_qgroup_relation_item(trans, quota_root, src, dst);
1037 0 : goto out;
1038 : }
1039 :
1040 : spin_lock(&fs_info->qgroup_lock);
1041 0 : ret = add_relation_rb(quota_root->fs_info, src, dst);
1042 : spin_unlock(&fs_info->qgroup_lock);
1043 : out:
1044 0 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1045 0 : return ret;
1046 : }
1047 :
1048 0 : int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1049 0 : struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1050 : {
1051 : struct btrfs_root *quota_root;
1052 : struct btrfs_qgroup *parent;
1053 : struct btrfs_qgroup *member;
1054 : struct btrfs_qgroup_list *list;
1055 : int ret = 0;
1056 : int err;
1057 :
1058 0 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1059 0 : quota_root = fs_info->quota_root;
1060 0 : if (!quota_root) {
1061 : ret = -EINVAL;
1062 : goto out;
1063 : }
1064 :
1065 : member = find_qgroup_rb(fs_info, src);
1066 : parent = find_qgroup_rb(fs_info, dst);
1067 0 : if (!member || !parent) {
1068 : ret = -EINVAL;
1069 : goto out;
1070 : }
1071 :
1072 : /* check if such qgroup relation exist firstly */
1073 0 : list_for_each_entry(list, &member->groups, next_group) {
1074 0 : if (list->group == parent)
1075 : goto exist;
1076 : }
1077 : ret = -ENOENT;
1078 : goto out;
1079 : exist:
1080 0 : ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1081 0 : err = del_qgroup_relation_item(trans, quota_root, dst, src);
1082 0 : if (err && !ret)
1083 : ret = err;
1084 :
1085 : spin_lock(&fs_info->qgroup_lock);
1086 0 : del_relation_rb(fs_info, src, dst);
1087 : spin_unlock(&fs_info->qgroup_lock);
1088 : out:
1089 0 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1090 0 : return ret;
1091 : }
1092 :
1093 1 : int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1094 1 : struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1095 : {
1096 : struct btrfs_root *quota_root;
1097 : struct btrfs_qgroup *qgroup;
1098 : int ret = 0;
1099 :
1100 1 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1101 1 : quota_root = fs_info->quota_root;
1102 1 : if (!quota_root) {
1103 : ret = -EINVAL;
1104 : goto out;
1105 : }
1106 : qgroup = find_qgroup_rb(fs_info, qgroupid);
1107 1 : if (qgroup) {
1108 : ret = -EEXIST;
1109 : goto out;
1110 : }
1111 :
1112 1 : ret = add_qgroup_item(trans, quota_root, qgroupid);
1113 1 : if (ret)
1114 : goto out;
1115 :
1116 : spin_lock(&fs_info->qgroup_lock);
1117 1 : qgroup = add_qgroup_rb(fs_info, qgroupid);
1118 : spin_unlock(&fs_info->qgroup_lock);
1119 :
1120 1 : if (IS_ERR(qgroup))
1121 0 : ret = PTR_ERR(qgroup);
1122 : out:
1123 1 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1124 1 : return ret;
1125 : }
1126 :
1127 0 : int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1128 0 : struct btrfs_fs_info *fs_info, u64 qgroupid)
1129 : {
1130 : struct btrfs_root *quota_root;
1131 : struct btrfs_qgroup *qgroup;
1132 : int ret = 0;
1133 :
1134 0 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1135 0 : quota_root = fs_info->quota_root;
1136 0 : if (!quota_root) {
1137 : ret = -EINVAL;
1138 : goto out;
1139 : }
1140 :
1141 : qgroup = find_qgroup_rb(fs_info, qgroupid);
1142 0 : if (!qgroup) {
1143 : ret = -ENOENT;
1144 : goto out;
1145 : } else {
1146 : /* check if there are no relations to this qgroup */
1147 0 : if (!list_empty(&qgroup->groups) ||
1148 0 : !list_empty(&qgroup->members)) {
1149 : ret = -EBUSY;
1150 : goto out;
1151 : }
1152 : }
1153 0 : ret = del_qgroup_item(trans, quota_root, qgroupid);
1154 :
1155 : spin_lock(&fs_info->qgroup_lock);
1156 0 : del_qgroup_rb(quota_root->fs_info, qgroupid);
1157 : spin_unlock(&fs_info->qgroup_lock);
1158 : out:
1159 0 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1160 0 : return ret;
1161 : }
1162 :
1163 3 : int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1164 3 : struct btrfs_fs_info *fs_info, u64 qgroupid,
1165 : struct btrfs_qgroup_limit *limit)
1166 : {
1167 : struct btrfs_root *quota_root;
1168 : struct btrfs_qgroup *qgroup;
1169 : int ret = 0;
1170 :
1171 3 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1172 3 : quota_root = fs_info->quota_root;
1173 3 : if (!quota_root) {
1174 : ret = -EINVAL;
1175 : goto out;
1176 : }
1177 :
1178 : qgroup = find_qgroup_rb(fs_info, qgroupid);
1179 3 : if (!qgroup) {
1180 : ret = -ENOENT;
1181 : goto out;
1182 : }
1183 3 : ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1184 : limit->flags, limit->max_rfer,
1185 : limit->max_excl, limit->rsv_rfer,
1186 : limit->rsv_excl);
1187 3 : if (ret) {
1188 0 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1189 0 : btrfs_info(fs_info, "unable to update quota limit for %llu",
1190 : qgroupid);
1191 : }
1192 :
1193 : spin_lock(&fs_info->qgroup_lock);
1194 3 : qgroup->lim_flags = limit->flags;
1195 3 : qgroup->max_rfer = limit->max_rfer;
1196 3 : qgroup->max_excl = limit->max_excl;
1197 3 : qgroup->rsv_rfer = limit->rsv_rfer;
1198 3 : qgroup->rsv_excl = limit->rsv_excl;
1199 : spin_unlock(&fs_info->qgroup_lock);
1200 : out:
1201 3 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1202 3 : return ret;
1203 : }
1204 :
1205 : static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1206 : struct btrfs_qgroup_operation *oper2)
1207 : {
1208 : /*
1209 : * Ignore seq and type here, we're looking for any operation
1210 : * at all related to this extent on that root.
1211 : */
1212 0 : if (oper1->bytenr < oper2->bytenr)
1213 : return -1;
1214 0 : if (oper1->bytenr > oper2->bytenr)
1215 : return 1;
1216 0 : if (oper1->ref_root < oper2->ref_root)
1217 : return -1;
1218 0 : if (oper1->ref_root > oper2->ref_root)
1219 : return 1;
1220 : return 0;
1221 : }
1222 :
1223 0 : static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1224 0 : struct btrfs_qgroup_operation *oper)
1225 : {
1226 : struct rb_node *n;
1227 0 : struct btrfs_qgroup_operation *cur;
1228 : int cmp;
1229 :
1230 : spin_lock(&fs_info->qgroup_op_lock);
1231 0 : n = fs_info->qgroup_op_tree.rb_node;
1232 0 : while (n) {
1233 : cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1234 : cmp = comp_oper_exist(cur, oper);
1235 0 : if (cmp < 0) {
1236 0 : n = n->rb_right;
1237 0 : } else if (cmp) {
1238 0 : n = n->rb_left;
1239 : } else {
1240 : spin_unlock(&fs_info->qgroup_op_lock);
1241 0 : return -EEXIST;
1242 : }
1243 : }
1244 : spin_unlock(&fs_info->qgroup_op_lock);
1245 0 : return 0;
1246 : }
1247 :
1248 68248 : static int comp_oper(struct btrfs_qgroup_operation *oper1,
1249 : struct btrfs_qgroup_operation *oper2)
1250 : {
1251 68248 : if (oper1->bytenr < oper2->bytenr)
1252 : return -1;
1253 66 : if (oper1->bytenr > oper2->bytenr)
1254 : return 1;
1255 0 : if (oper1->seq < oper2->seq)
1256 : return -1;
1257 0 : if (oper1->seq > oper2->seq)
1258 : return -1;
1259 0 : if (oper1->ref_root < oper2->ref_root)
1260 : return -1;
1261 0 : if (oper1->ref_root > oper2->ref_root)
1262 : return 1;
1263 0 : if (oper1->type < oper2->type)
1264 : return -1;
1265 0 : if (oper1->type > oper2->type)
1266 : return 1;
1267 0 : return 0;
1268 : }
1269 :
1270 12354 : static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1271 : struct btrfs_qgroup_operation *oper)
1272 : {
1273 : struct rb_node **p;
1274 : struct rb_node *parent = NULL;
1275 : struct btrfs_qgroup_operation *cur;
1276 : int cmp;
1277 :
1278 : spin_lock(&fs_info->qgroup_op_lock);
1279 12354 : p = &fs_info->qgroup_op_tree.rb_node;
1280 92956 : while (*p) {
1281 : parent = *p;
1282 68248 : cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1283 68248 : cmp = comp_oper(cur, oper);
1284 68248 : if (cmp < 0) {
1285 68182 : p = &(*p)->rb_right;
1286 66 : } else if (cmp) {
1287 66 : p = &(*p)->rb_left;
1288 : } else {
1289 : spin_unlock(&fs_info->qgroup_op_lock);
1290 0 : return -EEXIST;
1291 : }
1292 : }
1293 12354 : rb_link_node(&oper->n, parent, p);
1294 12354 : rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1295 : spin_unlock(&fs_info->qgroup_op_lock);
1296 12354 : return 0;
1297 : }
1298 :
1299 : /*
1300 : * Record a quota operation for processing later on.
1301 : * @trans: the transaction we are adding the delayed op to.
1302 : * @fs_info: the fs_info for this fs.
1303 : * @ref_root: the root of the reference we are acting on,
1304 : * @bytenr: the bytenr we are acting on.
1305 : * @num_bytes: the number of bytes in the reference.
1306 : * @type: the type of operation this is.
1307 : * @mod_seq: do we need to get a sequence number for looking up roots.
1308 : *
1309 : * We just add it to our trans qgroup_ref_list and carry on and process these
1310 : * operations in order at some later point. If the reference root isn't a fs
1311 : * root then we don't bother with doing anything.
1312 : *
1313 : * MUST BE HOLDING THE REF LOCK.
1314 : */
1315 113709 : int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1316 : struct btrfs_fs_info *fs_info, u64 ref_root,
1317 : u64 bytenr, u64 num_bytes,
1318 : enum btrfs_qgroup_operation_type type, int mod_seq)
1319 : {
1320 : struct btrfs_qgroup_operation *oper;
1321 : int ret;
1322 :
1323 113709 : if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1324 : return 0;
1325 :
1326 : oper = kmalloc(sizeof(*oper), GFP_NOFS);
1327 12354 : if (!oper)
1328 : return -ENOMEM;
1329 :
1330 12354 : oper->ref_root = ref_root;
1331 12354 : oper->bytenr = bytenr;
1332 12354 : oper->num_bytes = num_bytes;
1333 12354 : oper->type = type;
1334 24708 : oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1335 12354 : INIT_LIST_HEAD(&oper->elem.list);
1336 12354 : oper->elem.seq = 0;
1337 :
1338 12354 : if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1339 : /*
1340 : * If any operation for this bytenr/ref_root combo
1341 : * exists, then we know it's not exclusively owned and
1342 : * shouldn't be queued up.
1343 : *
1344 : * This also catches the case where we have a cloned
1345 : * extent that gets queued up multiple times during
1346 : * drop snapshot.
1347 : */
1348 0 : if (qgroup_oper_exists(fs_info, oper)) {
1349 0 : kfree(oper);
1350 0 : return 0;
1351 : }
1352 : }
1353 :
1354 12354 : ret = insert_qgroup_oper(fs_info, oper);
1355 12354 : if (ret) {
1356 : /* Shouldn't happen so have an assert for developers */
1357 : ASSERT(0);
1358 0 : kfree(oper);
1359 0 : return ret;
1360 : }
1361 12354 : list_add_tail(&oper->list, &trans->qgroup_ref_list);
1362 :
1363 12354 : if (mod_seq)
1364 1186 : btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1365 :
1366 : return 0;
1367 : }
1368 :
1369 : /*
1370 : * The easy accounting, if we are adding/removing the only ref for an extent
1371 : * then this qgroup and all of the parent qgroups get their refrence and
1372 : * exclusive counts adjusted.
1373 : */
1374 21580 : static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1375 : struct btrfs_qgroup_operation *oper)
1376 : {
1377 : struct btrfs_qgroup *qgroup;
1378 : struct ulist *tmp;
1379 : struct btrfs_qgroup_list *glist;
1380 : struct ulist_node *unode;
1381 : struct ulist_iterator uiter;
1382 : int sign = 0;
1383 : int ret = 0;
1384 :
1385 10790 : tmp = ulist_alloc(GFP_NOFS);
1386 10790 : if (!tmp)
1387 : return -ENOMEM;
1388 :
1389 : spin_lock(&fs_info->qgroup_lock);
1390 10790 : if (!fs_info->quota_root)
1391 : goto out;
1392 10790 : qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1393 10790 : if (!qgroup)
1394 : goto out;
1395 10789 : switch (oper->type) {
1396 : case BTRFS_QGROUP_OPER_ADD_EXCL:
1397 : sign = 1;
1398 6785 : break;
1399 : case BTRFS_QGROUP_OPER_SUB_EXCL:
1400 : sign = -1;
1401 4004 : break;
1402 : default:
1403 : ASSERT(0);
1404 : }
1405 10789 : qgroup->rfer += sign * oper->num_bytes;
1406 10789 : qgroup->rfer_cmpr += sign * oper->num_bytes;
1407 :
1408 10789 : WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1409 10789 : qgroup->excl += sign * oper->num_bytes;
1410 10789 : qgroup->excl_cmpr += sign * oper->num_bytes;
1411 :
1412 : qgroup_dirty(fs_info, qgroup);
1413 :
1414 : /* Get all of the parent groups that contain this qgroup */
1415 14601 : list_for_each_entry(glist, &qgroup->groups, next_group) {
1416 3812 : ret = ulist_add(tmp, glist->group->qgroupid,
1417 3812 : ptr_to_u64(glist->group), GFP_ATOMIC);
1418 3812 : if (ret < 0)
1419 : goto out;
1420 : }
1421 :
1422 : /* Iterate all of the parents and adjust their reference counts */
1423 10789 : ULIST_ITER_INIT(&uiter);
1424 25390 : while ((unode = ulist_next(tmp, &uiter))) {
1425 3812 : qgroup = u64_to_ptr(unode->aux);
1426 3812 : qgroup->rfer += sign * oper->num_bytes;
1427 3812 : qgroup->rfer_cmpr += sign * oper->num_bytes;
1428 3812 : qgroup->excl += sign * oper->num_bytes;
1429 3812 : if (sign < 0)
1430 10 : WARN_ON(qgroup->excl < oper->num_bytes);
1431 3812 : qgroup->excl_cmpr += sign * oper->num_bytes;
1432 : qgroup_dirty(fs_info, qgroup);
1433 :
1434 : /* Add any parents of the parents */
1435 3812 : list_for_each_entry(glist, &qgroup->groups, next_group) {
1436 0 : ret = ulist_add(tmp, glist->group->qgroupid,
1437 0 : ptr_to_u64(glist->group), GFP_ATOMIC);
1438 0 : if (ret < 0)
1439 : goto out;
1440 : }
1441 : }
1442 : ret = 0;
1443 : out:
1444 : spin_unlock(&fs_info->qgroup_lock);
1445 10790 : ulist_free(tmp);
1446 10790 : return ret;
1447 : }
1448 :
1449 : /*
1450 : * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1451 : * properly.
1452 : */
1453 9079 : static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1454 : u64 root_to_skip, struct ulist *tmp,
1455 : struct ulist *roots, struct ulist *qgroups,
1456 : u64 seq, int *old_roots, int rescan)
1457 : {
1458 : struct ulist_node *unode;
1459 : struct ulist_iterator uiter;
1460 : struct ulist_node *tmp_unode;
1461 : struct ulist_iterator tmp_uiter;
1462 : struct btrfs_qgroup *qg;
1463 : int ret;
1464 :
1465 4258 : ULIST_ITER_INIT(&uiter);
1466 13337 : while ((unode = ulist_next(roots, &uiter))) {
1467 : /* We don't count our current root here */
1468 4821 : if (unode->val == root_to_skip)
1469 0 : continue;
1470 : qg = find_qgroup_rb(fs_info, unode->val);
1471 4821 : if (!qg)
1472 142 : continue;
1473 : /*
1474 : * We could have a pending removal of this same ref so we may
1475 : * not have actually found our ref root when doing
1476 : * btrfs_find_all_roots, so we need to keep track of how many
1477 : * old roots we find in case we removed ours and added a
1478 : * different one at the same time. I don't think this could
1479 : * happen in practice but that sort of thinking leads to pain
1480 : * and suffering and to the dark side.
1481 : */
1482 4679 : (*old_roots)++;
1483 :
1484 4679 : ulist_reinit(tmp);
1485 4679 : ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1486 : GFP_ATOMIC);
1487 4679 : if (ret < 0)
1488 : return ret;
1489 4679 : ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1490 4679 : if (ret < 0)
1491 : return ret;
1492 4679 : ULIST_ITER_INIT(&tmp_uiter);
1493 14037 : while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1494 : struct btrfs_qgroup_list *glist;
1495 :
1496 4679 : qg = u64_to_ptr(tmp_unode->aux);
1497 : /*
1498 : * We use this sequence number to keep from having to
1499 : * run the whole list and 0 out the refcnt every time.
1500 : * We basically use sequnce as the known 0 count and
1501 : * then add 1 everytime we see a qgroup. This is how we
1502 : * get how many of the roots actually point up to the
1503 : * upper level qgroups in order to determine exclusive
1504 : * counts.
1505 : *
1506 : * For rescan we want to set old_refcnt to seq so our
1507 : * exclusive calculations end up correct.
1508 : */
1509 4679 : if (rescan)
1510 3612 : qg->old_refcnt = seq;
1511 1067 : else if (qg->old_refcnt < seq)
1512 1067 : qg->old_refcnt = seq + 1;
1513 : else
1514 0 : qg->old_refcnt++;
1515 :
1516 4679 : if (qg->new_refcnt < seq)
1517 4679 : qg->new_refcnt = seq + 1;
1518 : else
1519 0 : qg->new_refcnt++;
1520 4679 : list_for_each_entry(glist, &qg->groups, next_group) {
1521 0 : ret = ulist_add(qgroups, glist->group->qgroupid,
1522 0 : ptr_to_u64(glist->group),
1523 : GFP_ATOMIC);
1524 0 : if (ret < 0)
1525 : return ret;
1526 0 : ret = ulist_add(tmp, glist->group->qgroupid,
1527 0 : ptr_to_u64(glist->group),
1528 : GFP_ATOMIC);
1529 0 : if (ret < 0)
1530 : return ret;
1531 : }
1532 : }
1533 : }
1534 : return 0;
1535 : }
1536 :
1537 : /*
1538 : * We need to walk forward in our operation tree and account for any roots that
1539 : * were deleted after we made this operation.
1540 : */
1541 1564 : static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1542 : struct btrfs_qgroup_operation *oper,
1543 : struct ulist *tmp,
1544 : struct ulist *qgroups, u64 seq,
1545 : int *old_roots)
1546 : {
1547 : struct ulist_node *unode;
1548 : struct ulist_iterator uiter;
1549 : struct btrfs_qgroup *qg;
1550 : struct btrfs_qgroup_operation *tmp_oper;
1551 : struct rb_node *n;
1552 : int ret;
1553 :
1554 1564 : ulist_reinit(tmp);
1555 :
1556 : /*
1557 : * We only walk forward in the tree since we're only interested in
1558 : * removals that happened _after_ our operation.
1559 : */
1560 : spin_lock(&fs_info->qgroup_op_lock);
1561 1564 : n = rb_next(&oper->n);
1562 : spin_unlock(&fs_info->qgroup_op_lock);
1563 1564 : if (!n)
1564 : return 0;
1565 1406 : tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1566 2812 : while (tmp_oper->bytenr == oper->bytenr) {
1567 : /*
1568 : * If it's not a removal we don't care, additions work out
1569 : * properly with our refcnt tracking.
1570 : */
1571 0 : if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1572 : tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1573 : goto next;
1574 0 : qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1575 0 : if (!qg)
1576 : goto next;
1577 0 : ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1578 : GFP_ATOMIC);
1579 0 : if (ret) {
1580 0 : if (ret < 0)
1581 : return ret;
1582 : /*
1583 : * We only want to increase old_roots if this qgroup is
1584 : * not already in the list of qgroups. If it is already
1585 : * there then that means it must have been re-added or
1586 : * the delete will be discarded because we had an
1587 : * existing ref that we haven't looked up yet. In this
1588 : * case we don't want to increase old_roots. So if ret
1589 : * == 1 then we know that this is the first time we've
1590 : * seen this qgroup and we can bump the old_roots.
1591 : */
1592 0 : (*old_roots)++;
1593 0 : ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1594 : GFP_ATOMIC);
1595 0 : if (ret < 0)
1596 : return ret;
1597 : }
1598 : next:
1599 : spin_lock(&fs_info->qgroup_op_lock);
1600 0 : n = rb_next(&tmp_oper->n);
1601 : spin_unlock(&fs_info->qgroup_op_lock);
1602 0 : if (!n)
1603 : break;
1604 0 : tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1605 : }
1606 :
1607 : /* Ok now process the qgroups we found */
1608 1406 : ULIST_ITER_INIT(&uiter);
1609 2812 : while ((unode = ulist_next(tmp, &uiter))) {
1610 : struct btrfs_qgroup_list *glist;
1611 :
1612 0 : qg = u64_to_ptr(unode->aux);
1613 0 : if (qg->old_refcnt < seq)
1614 0 : qg->old_refcnt = seq + 1;
1615 : else
1616 0 : qg->old_refcnt++;
1617 0 : if (qg->new_refcnt < seq)
1618 0 : qg->new_refcnt = seq + 1;
1619 : else
1620 0 : qg->new_refcnt++;
1621 0 : list_for_each_entry(glist, &qg->groups, next_group) {
1622 0 : ret = ulist_add(qgroups, glist->group->qgroupid,
1623 0 : ptr_to_u64(glist->group), GFP_ATOMIC);
1624 0 : if (ret < 0)
1625 : return ret;
1626 0 : ret = ulist_add(tmp, glist->group->qgroupid,
1627 0 : ptr_to_u64(glist->group), GFP_ATOMIC);
1628 0 : if (ret < 0)
1629 : return ret;
1630 : }
1631 : }
1632 : return 0;
1633 : }
1634 :
1635 : /* Add refcnt for the newly added reference. */
1636 1564 : static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1637 : struct btrfs_qgroup_operation *oper,
1638 : struct btrfs_qgroup *qgroup,
1639 : struct ulist *tmp, struct ulist *qgroups,
1640 : u64 seq)
1641 : {
1642 : struct ulist_node *unode;
1643 : struct ulist_iterator uiter;
1644 : struct btrfs_qgroup *qg;
1645 : int ret;
1646 :
1647 1564 : ulist_reinit(tmp);
1648 1564 : ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1649 : GFP_ATOMIC);
1650 1564 : if (ret < 0)
1651 : return ret;
1652 1564 : ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1653 : GFP_ATOMIC);
1654 1564 : if (ret < 0)
1655 : return ret;
1656 1564 : ULIST_ITER_INIT(&uiter);
1657 3128 : while ((unode = ulist_next(tmp, &uiter))) {
1658 : struct btrfs_qgroup_list *glist;
1659 :
1660 1564 : qg = u64_to_ptr(unode->aux);
1661 1564 : if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1662 0 : if (qg->new_refcnt < seq)
1663 0 : qg->new_refcnt = seq + 1;
1664 : else
1665 0 : qg->new_refcnt++;
1666 : } else {
1667 1564 : if (qg->old_refcnt < seq)
1668 1564 : qg->old_refcnt = seq + 1;
1669 : else
1670 0 : qg->old_refcnt++;
1671 : }
1672 1564 : list_for_each_entry(glist, &qg->groups, next_group) {
1673 0 : ret = ulist_add(tmp, glist->group->qgroupid,
1674 0 : ptr_to_u64(glist->group), GFP_ATOMIC);
1675 0 : if (ret < 0)
1676 : return ret;
1677 0 : ret = ulist_add(qgroups, glist->group->qgroupid,
1678 0 : ptr_to_u64(glist->group), GFP_ATOMIC);
1679 0 : if (ret < 0)
1680 : return ret;
1681 : }
1682 : }
1683 : return 0;
1684 : }
1685 :
1686 : /*
1687 : * This adjusts the counters for all referenced qgroups if need be.
1688 : */
1689 4258 : static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1690 : u64 root_to_skip, u64 num_bytes,
1691 : struct ulist *qgroups, u64 seq,
1692 : int old_roots, int new_roots, int rescan)
1693 : {
1694 : struct ulist_node *unode;
1695 : struct ulist_iterator uiter;
1696 : struct btrfs_qgroup *qg;
1697 : u64 cur_new_count, cur_old_count;
1698 :
1699 4258 : ULIST_ITER_INIT(&uiter);
1700 10501 : while ((unode = ulist_next(qgroups, &uiter))) {
1701 : bool dirty = false;
1702 :
1703 6243 : qg = u64_to_ptr(unode->aux);
1704 : /*
1705 : * Wasn't referenced before but is now, add to the reference
1706 : * counters.
1707 : */
1708 6243 : if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
1709 3612 : qg->rfer += num_bytes;
1710 3612 : qg->rfer_cmpr += num_bytes;
1711 : dirty = true;
1712 : }
1713 :
1714 : /*
1715 : * Was referenced before but isn't now, subtract from the
1716 : * reference counters.
1717 : */
1718 6243 : if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
1719 1564 : qg->rfer -= num_bytes;
1720 1564 : qg->rfer_cmpr -= num_bytes;
1721 : dirty = true;
1722 : }
1723 :
1724 6243 : if (qg->old_refcnt < seq)
1725 : cur_old_count = 0;
1726 : else
1727 6243 : cur_old_count = qg->old_refcnt - seq;
1728 6243 : if (qg->new_refcnt < seq)
1729 : cur_new_count = 0;
1730 : else
1731 4679 : cur_new_count = qg->new_refcnt - seq;
1732 :
1733 : /*
1734 : * If our refcount was the same as the roots previously but our
1735 : * new count isn't the same as the number of roots now then we
1736 : * went from having a exclusive reference on this range to not.
1737 : */
1738 6740 : if (old_roots && cur_old_count == old_roots &&
1739 994 : (cur_new_count != new_roots || new_roots == 0)) {
1740 497 : WARN_ON(cur_new_count != new_roots && new_roots == 0);
1741 497 : qg->excl -= num_bytes;
1742 497 : qg->excl_cmpr -= num_bytes;
1743 : dirty = true;
1744 : }
1745 :
1746 : /*
1747 : * If we didn't reference all the roots before but now we do we
1748 : * have an exclusive reference to this range.
1749 : */
1750 6243 : if ((!old_roots || (old_roots && cur_old_count != old_roots))
1751 5746 : && cur_new_count == new_roots) {
1752 2545 : qg->excl += num_bytes;
1753 2545 : qg->excl_cmpr += num_bytes;
1754 : dirty = true;
1755 : }
1756 :
1757 6243 : if (dirty)
1758 : qgroup_dirty(fs_info, qg);
1759 : }
1760 4258 : return 0;
1761 : }
1762 :
1763 : /*
1764 : * If we removed a data extent and there were other references for that bytenr
1765 : * then we need to lookup all referenced roots to make sure we still don't
1766 : * reference this bytenr. If we do then we can just discard this operation.
1767 : */
1768 1186 : static int check_existing_refs(struct btrfs_trans_handle *trans,
1769 : struct btrfs_fs_info *fs_info,
1770 : struct btrfs_qgroup_operation *oper)
1771 : {
1772 1186 : struct ulist *roots = NULL;
1773 : struct ulist_node *unode;
1774 : struct ulist_iterator uiter;
1775 : int ret = 0;
1776 :
1777 1186 : ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1778 : oper->elem.seq, &roots);
1779 1186 : if (ret < 0)
1780 : return ret;
1781 : ret = 0;
1782 :
1783 1186 : ULIST_ITER_INIT(&uiter);
1784 3129 : while ((unode = ulist_next(roots, &uiter))) {
1785 757 : if (unode->val == oper->ref_root) {
1786 : ret = 1;
1787 : break;
1788 : }
1789 : }
1790 1186 : ulist_free(roots);
1791 1186 : btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1792 :
1793 1186 : return ret;
1794 : }
1795 :
1796 : /*
1797 : * If we share a reference across multiple roots then we may need to adjust
1798 : * various qgroups referenced and exclusive counters. The basic premise is this
1799 : *
1800 : * 1) We have seq to represent a 0 count. Instead of looping through all of the
1801 : * qgroups and resetting their refcount to 0 we just constantly bump this
1802 : * sequence number to act as the base reference count. This means that if
1803 : * anybody is equal to or below this sequence they were never referenced. We
1804 : * jack this sequence up by the number of roots we found each time in order to
1805 : * make sure we don't have any overlap.
1806 : *
1807 : * 2) We first search all the roots that reference the area _except_ the root
1808 : * we're acting on currently. This makes up the old_refcnt of all the qgroups
1809 : * before.
1810 : *
1811 : * 3) We walk all of the qgroups referenced by the root we are currently acting
1812 : * on, and will either adjust old_refcnt in the case of a removal or the
1813 : * new_refcnt in the case of an addition.
1814 : *
1815 : * 4) Finally we walk all the qgroups that are referenced by this range
1816 : * including the root we are acting on currently. We will adjust the counters
1817 : * based on the number of roots we had and will have after this operation.
1818 : *
1819 : * Take this example as an illustration
1820 : *
1821 : * [qgroup 1/0]
1822 : * / | \
1823 : * [qg 0/0] [qg 0/1] [qg 0/2]
1824 : * \ | /
1825 : * [ extent ]
1826 : *
1827 : * Say we are adding a reference that is covered by qg 0/0. The first step
1828 : * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
1829 : * old_roots being 2. Because it is adding new_roots will be 1. We then go
1830 : * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
1831 : * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
1832 : * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
1833 : * reference and thus must add the size to the referenced bytes. Everything
1834 : * else is the same so nothing else changes.
1835 : */
1836 1564 : static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1837 1564 : struct btrfs_fs_info *fs_info,
1838 : struct btrfs_qgroup_operation *oper)
1839 : {
1840 1564 : struct ulist *roots = NULL;
1841 : struct ulist *qgroups, *tmp;
1842 : struct btrfs_qgroup *qgroup;
1843 1564 : struct seq_list elem = {};
1844 : u64 seq;
1845 1564 : int old_roots = 0;
1846 : int new_roots = 0;
1847 : int ret = 0;
1848 :
1849 1564 : if (oper->elem.seq) {
1850 1186 : ret = check_existing_refs(trans, fs_info, oper);
1851 1186 : if (ret < 0)
1852 : return ret;
1853 1186 : if (ret)
1854 : return 0;
1855 : }
1856 :
1857 1564 : qgroups = ulist_alloc(GFP_NOFS);
1858 1564 : if (!qgroups)
1859 : return -ENOMEM;
1860 :
1861 1564 : tmp = ulist_alloc(GFP_NOFS);
1862 1564 : if (!tmp) {
1863 0 : ulist_free(qgroups);
1864 0 : return -ENOMEM;
1865 : }
1866 :
1867 1564 : btrfs_get_tree_mod_seq(fs_info, &elem);
1868 1564 : ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
1869 : &roots);
1870 1564 : btrfs_put_tree_mod_seq(fs_info, &elem);
1871 1564 : if (ret < 0) {
1872 0 : ulist_free(qgroups);
1873 0 : ulist_free(tmp);
1874 0 : return ret;
1875 : }
1876 : spin_lock(&fs_info->qgroup_lock);
1877 1564 : qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1878 1564 : if (!qgroup)
1879 : goto out;
1880 1564 : seq = fs_info->qgroup_seq;
1881 :
1882 : /*
1883 : * So roots is the list of all the roots currently pointing at the
1884 : * bytenr, including the ref we are adding if we are adding, or not if
1885 : * we are removing a ref. So we pass in the ref_root to skip that root
1886 : * in our calculations. We set old_refnct and new_refcnt cause who the
1887 : * hell knows what everything looked like before, and it doesn't matter
1888 : * except...
1889 : */
1890 1564 : ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
1891 : seq, &old_roots, 0);
1892 1564 : if (ret < 0)
1893 : goto out;
1894 :
1895 : /*
1896 : * Now adjust the refcounts of the qgroups that care about this
1897 : * reference, either the old_count in the case of removal or new_count
1898 : * in the case of an addition.
1899 : */
1900 1564 : ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
1901 : seq);
1902 1564 : if (ret < 0)
1903 : goto out;
1904 :
1905 : /*
1906 : * ...in the case of removals. If we had a removal before we got around
1907 : * to processing this operation then we need to find that guy and count
1908 : * his references as if they really existed so we don't end up screwing
1909 : * up the exclusive counts. Then whenever we go to process the delete
1910 : * everything will be grand and we can account for whatever exclusive
1911 : * changes need to be made there. We also have to pass in old_roots so
1912 : * we have an accurate count of the roots as it pertains to this
1913 : * operations view of the world.
1914 : */
1915 1564 : ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
1916 : &old_roots);
1917 1564 : if (ret < 0)
1918 : goto out;
1919 :
1920 : /*
1921 : * We are adding our root, need to adjust up the number of roots,
1922 : * otherwise old_roots is the number of roots we want.
1923 : */
1924 1564 : if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1925 0 : new_roots = old_roots + 1;
1926 : } else {
1927 1564 : new_roots = old_roots;
1928 1564 : old_roots++;
1929 : }
1930 1564 : fs_info->qgroup_seq += old_roots + 1;
1931 :
1932 :
1933 : /*
1934 : * And now the magic happens, bless Arne for having a pretty elegant
1935 : * solution for this.
1936 : */
1937 1564 : qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
1938 : qgroups, seq, old_roots, new_roots, 0);
1939 : out:
1940 : spin_unlock(&fs_info->qgroup_lock);
1941 1564 : ulist_free(qgroups);
1942 1564 : ulist_free(roots);
1943 1564 : ulist_free(tmp);
1944 1564 : return ret;
1945 : }
1946 :
1947 : /*
1948 : * Process a reference to a shared subtree. This type of operation is
1949 : * queued during snapshot removal when we encounter extents which are
1950 : * shared between more than one root.
1951 : */
1952 0 : static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1953 0 : struct btrfs_fs_info *fs_info,
1954 : struct btrfs_qgroup_operation *oper)
1955 : {
1956 0 : struct ulist *roots = NULL;
1957 : struct ulist_node *unode;
1958 : struct ulist_iterator uiter;
1959 : struct btrfs_qgroup_list *glist;
1960 : struct ulist *parents;
1961 : int ret = 0;
1962 : int err;
1963 : struct btrfs_qgroup *qg;
1964 : u64 root_obj = 0;
1965 0 : struct seq_list elem = {};
1966 :
1967 0 : parents = ulist_alloc(GFP_NOFS);
1968 0 : if (!parents)
1969 : return -ENOMEM;
1970 :
1971 0 : btrfs_get_tree_mod_seq(fs_info, &elem);
1972 0 : ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1973 : elem.seq, &roots);
1974 0 : btrfs_put_tree_mod_seq(fs_info, &elem);
1975 0 : if (ret < 0)
1976 : goto out;
1977 :
1978 0 : if (roots->nnodes != 1)
1979 : goto out;
1980 :
1981 0 : ULIST_ITER_INIT(&uiter);
1982 0 : unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
1983 : /*
1984 : * If we find our ref root then that means all refs
1985 : * this extent has to the root have not yet been
1986 : * deleted. In that case, we do nothing and let the
1987 : * last ref for this bytenr drive our update.
1988 : *
1989 : * This can happen for example if an extent is
1990 : * referenced multiple times in a snapshot (clone,
1991 : * etc). If we are in the middle of snapshot removal,
1992 : * queued updates for such an extent will find the
1993 : * root if we have not yet finished removing the
1994 : * snapshot.
1995 : */
1996 0 : if (unode->val == oper->ref_root)
1997 : goto out;
1998 :
1999 : root_obj = unode->val;
2000 0 : BUG_ON(!root_obj);
2001 :
2002 : spin_lock(&fs_info->qgroup_lock);
2003 : qg = find_qgroup_rb(fs_info, root_obj);
2004 0 : if (!qg)
2005 : goto out_unlock;
2006 :
2007 0 : qg->excl += oper->num_bytes;
2008 0 : qg->excl_cmpr += oper->num_bytes;
2009 : qgroup_dirty(fs_info, qg);
2010 :
2011 : /*
2012 : * Adjust counts for parent groups. First we find all
2013 : * parents, then in the 2nd loop we do the adjustment
2014 : * while adding parents of the parents to our ulist.
2015 : */
2016 0 : list_for_each_entry(glist, &qg->groups, next_group) {
2017 0 : err = ulist_add(parents, glist->group->qgroupid,
2018 0 : ptr_to_u64(glist->group), GFP_ATOMIC);
2019 0 : if (err < 0) {
2020 : ret = err;
2021 : goto out_unlock;
2022 : }
2023 : }
2024 :
2025 0 : ULIST_ITER_INIT(&uiter);
2026 0 : while ((unode = ulist_next(parents, &uiter))) {
2027 0 : qg = u64_to_ptr(unode->aux);
2028 0 : qg->excl += oper->num_bytes;
2029 0 : qg->excl_cmpr += oper->num_bytes;
2030 : qgroup_dirty(fs_info, qg);
2031 :
2032 : /* Add any parents of the parents */
2033 0 : list_for_each_entry(glist, &qg->groups, next_group) {
2034 0 : err = ulist_add(parents, glist->group->qgroupid,
2035 0 : ptr_to_u64(glist->group), GFP_ATOMIC);
2036 0 : if (err < 0) {
2037 : ret = err;
2038 : goto out_unlock;
2039 : }
2040 : }
2041 : }
2042 :
2043 : out_unlock:
2044 : spin_unlock(&fs_info->qgroup_lock);
2045 :
2046 : out:
2047 0 : ulist_free(roots);
2048 0 : ulist_free(parents);
2049 0 : return ret;
2050 : }
2051 :
2052 : /*
2053 : * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
2054 : * from the fs. First, all roots referencing the extent are searched, and
2055 : * then the space is accounted accordingly to the different roots. The
2056 : * accounting algorithm works in 3 steps documented inline.
2057 : */
2058 12354 : static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
2059 : struct btrfs_fs_info *fs_info,
2060 : struct btrfs_qgroup_operation *oper)
2061 : {
2062 : int ret = 0;
2063 :
2064 12354 : if (!fs_info->quota_enabled)
2065 : return 0;
2066 :
2067 12354 : BUG_ON(!fs_info->quota_root);
2068 :
2069 12354 : mutex_lock(&fs_info->qgroup_rescan_lock);
2070 12354 : if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2071 0 : if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
2072 0 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2073 0 : return 0;
2074 : }
2075 : }
2076 12354 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2077 :
2078 : ASSERT(is_fstree(oper->ref_root));
2079 :
2080 12354 : switch (oper->type) {
2081 : case BTRFS_QGROUP_OPER_ADD_EXCL:
2082 : case BTRFS_QGROUP_OPER_SUB_EXCL:
2083 10790 : ret = qgroup_excl_accounting(fs_info, oper);
2084 10790 : break;
2085 : case BTRFS_QGROUP_OPER_ADD_SHARED:
2086 : case BTRFS_QGROUP_OPER_SUB_SHARED:
2087 1564 : ret = qgroup_shared_accounting(trans, fs_info, oper);
2088 1564 : break;
2089 : case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2090 0 : ret = qgroup_subtree_accounting(trans, fs_info, oper);
2091 0 : break;
2092 : default:
2093 : ASSERT(0);
2094 : }
2095 12354 : return ret;
2096 : }
2097 :
2098 : /*
2099 : * Needs to be called everytime we run delayed refs, even if there is an error
2100 : * in order to cleanup outstanding operations.
2101 : */
2102 41481 : int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
2103 : struct btrfs_fs_info *fs_info)
2104 : {
2105 : struct btrfs_qgroup_operation *oper;
2106 : int ret = 0;
2107 :
2108 149151 : while (!list_empty(&trans->qgroup_ref_list)) {
2109 12354 : oper = list_first_entry(&trans->qgroup_ref_list,
2110 : struct btrfs_qgroup_operation, list);
2111 12354 : list_del_init(&oper->list);
2112 12354 : if (!ret || !trans->aborted)
2113 12354 : ret = btrfs_qgroup_account(trans, fs_info, oper);
2114 : spin_lock(&fs_info->qgroup_op_lock);
2115 12354 : rb_erase(&oper->n, &fs_info->qgroup_op_tree);
2116 : spin_unlock(&fs_info->qgroup_op_lock);
2117 12354 : btrfs_put_tree_mod_seq(fs_info, &oper->elem);
2118 12354 : kfree(oper);
2119 : }
2120 41481 : return ret;
2121 : }
2122 :
2123 : /*
2124 : * called from commit_transaction. Writes all changed qgroups to disk.
2125 : */
2126 2098 : int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2127 : struct btrfs_fs_info *fs_info)
2128 : {
2129 2098 : struct btrfs_root *quota_root = fs_info->quota_root;
2130 : int ret = 0;
2131 : int start_rescan_worker = 0;
2132 :
2133 2098 : if (!quota_root)
2134 : goto out;
2135 :
2136 127 : if (!fs_info->quota_enabled && fs_info->pending_quota_state)
2137 : start_rescan_worker = 1;
2138 :
2139 127 : fs_info->quota_enabled = fs_info->pending_quota_state;
2140 :
2141 : spin_lock(&fs_info->qgroup_lock);
2142 554 : while (!list_empty(&fs_info->dirty_qgroups)) {
2143 : struct btrfs_qgroup *qgroup;
2144 150 : qgroup = list_first_entry(&fs_info->dirty_qgroups,
2145 : struct btrfs_qgroup, dirty);
2146 150 : list_del_init(&qgroup->dirty);
2147 : spin_unlock(&fs_info->qgroup_lock);
2148 150 : ret = update_qgroup_info_item(trans, quota_root, qgroup);
2149 150 : if (ret)
2150 0 : fs_info->qgroup_flags |=
2151 : BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2152 : spin_lock(&fs_info->qgroup_lock);
2153 : }
2154 127 : if (fs_info->quota_enabled)
2155 127 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2156 : else
2157 0 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2158 : spin_unlock(&fs_info->qgroup_lock);
2159 :
2160 127 : ret = update_qgroup_status_item(trans, fs_info, quota_root);
2161 127 : if (ret)
2162 0 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2163 :
2164 127 : if (!ret && start_rescan_worker) {
2165 6 : ret = qgroup_rescan_init(fs_info, 0, 1);
2166 6 : if (!ret) {
2167 6 : qgroup_rescan_zero_tracking(fs_info);
2168 6 : btrfs_queue_work(fs_info->qgroup_rescan_workers,
2169 : &fs_info->qgroup_rescan_work);
2170 : }
2171 : ret = 0;
2172 : }
2173 :
2174 : out:
2175 :
2176 2098 : return ret;
2177 : }
2178 :
2179 : /*
2180 : * copy the acounting information between qgroups. This is necessary when a
2181 : * snapshot or a subvolume is created
2182 : */
2183 195 : int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2184 11 : struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2185 : struct btrfs_qgroup_inherit *inherit)
2186 : {
2187 : int ret = 0;
2188 : int i;
2189 : u64 *i_qgroups;
2190 195 : struct btrfs_root *quota_root = fs_info->quota_root;
2191 : struct btrfs_qgroup *srcgroup;
2192 : struct btrfs_qgroup *dstgroup;
2193 : u32 level_size = 0;
2194 : u64 nums;
2195 :
2196 195 : mutex_lock(&fs_info->qgroup_ioctl_lock);
2197 195 : if (!fs_info->quota_enabled)
2198 : goto out;
2199 :
2200 11 : if (!quota_root) {
2201 : ret = -EINVAL;
2202 : goto out;
2203 : }
2204 :
2205 11 : if (inherit) {
2206 10 : i_qgroups = (u64 *)(inherit + 1);
2207 20 : nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2208 10 : 2 * inherit->num_excl_copies;
2209 20 : for (i = 0; i < nums; ++i) {
2210 10 : srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2211 10 : if (!srcgroup) {
2212 : ret = -EINVAL;
2213 : goto out;
2214 : }
2215 10 : ++i_qgroups;
2216 : }
2217 : }
2218 :
2219 : /*
2220 : * create a tracking group for the subvol itself
2221 : */
2222 11 : ret = add_qgroup_item(trans, quota_root, objectid);
2223 11 : if (ret)
2224 : goto out;
2225 :
2226 11 : if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2227 0 : ret = update_qgroup_limit_item(trans, quota_root, objectid,
2228 : inherit->lim.flags,
2229 : inherit->lim.max_rfer,
2230 : inherit->lim.max_excl,
2231 : inherit->lim.rsv_rfer,
2232 : inherit->lim.rsv_excl);
2233 0 : if (ret)
2234 : goto out;
2235 : }
2236 :
2237 11 : if (srcid) {
2238 1 : struct btrfs_root *srcroot;
2239 : struct btrfs_key srckey;
2240 : int srcroot_level;
2241 :
2242 1 : srckey.objectid = srcid;
2243 1 : srckey.type = BTRFS_ROOT_ITEM_KEY;
2244 1 : srckey.offset = (u64)-1;
2245 : srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2246 1 : if (IS_ERR(srcroot)) {
2247 0 : ret = PTR_ERR(srcroot);
2248 0 : goto out;
2249 : }
2250 :
2251 : rcu_read_lock();
2252 1 : srcroot_level = btrfs_header_level(srcroot->node);
2253 : level_size = btrfs_level_size(srcroot, srcroot_level);
2254 : rcu_read_unlock();
2255 : }
2256 :
2257 : /*
2258 : * add qgroup to all inherited groups
2259 : */
2260 11 : if (inherit) {
2261 10 : i_qgroups = (u64 *)(inherit + 1);
2262 20 : for (i = 0; i < inherit->num_qgroups; ++i) {
2263 10 : ret = add_qgroup_relation_item(trans, quota_root,
2264 : objectid, *i_qgroups);
2265 10 : if (ret)
2266 : goto out;
2267 10 : ret = add_qgroup_relation_item(trans, quota_root,
2268 : *i_qgroups, objectid);
2269 10 : if (ret)
2270 : goto out;
2271 10 : ++i_qgroups;
2272 : }
2273 : }
2274 :
2275 :
2276 : spin_lock(&fs_info->qgroup_lock);
2277 :
2278 11 : dstgroup = add_qgroup_rb(fs_info, objectid);
2279 11 : if (IS_ERR(dstgroup)) {
2280 0 : ret = PTR_ERR(dstgroup);
2281 0 : goto unlock;
2282 : }
2283 :
2284 11 : if (srcid) {
2285 : srcgroup = find_qgroup_rb(fs_info, srcid);
2286 1 : if (!srcgroup)
2287 : goto unlock;
2288 :
2289 : /*
2290 : * We call inherit after we clone the root in order to make sure
2291 : * our counts don't go crazy, so at this point the only
2292 : * difference between the two roots should be the root node.
2293 : */
2294 1 : dstgroup->rfer = srcgroup->rfer;
2295 1 : dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2296 1 : dstgroup->excl = level_size;
2297 1 : dstgroup->excl_cmpr = level_size;
2298 1 : srcgroup->excl = level_size;
2299 1 : srcgroup->excl_cmpr = level_size;
2300 : qgroup_dirty(fs_info, dstgroup);
2301 : qgroup_dirty(fs_info, srcgroup);
2302 : }
2303 :
2304 11 : if (!inherit)
2305 : goto unlock;
2306 :
2307 10 : i_qgroups = (u64 *)(inherit + 1);
2308 20 : for (i = 0; i < inherit->num_qgroups; ++i) {
2309 10 : ret = add_relation_rb(quota_root->fs_info, objectid,
2310 : *i_qgroups);
2311 10 : if (ret)
2312 : goto unlock;
2313 10 : ++i_qgroups;
2314 : }
2315 :
2316 0 : for (i = 0; i < inherit->num_ref_copies; ++i) {
2317 : struct btrfs_qgroup *src;
2318 : struct btrfs_qgroup *dst;
2319 :
2320 0 : src = find_qgroup_rb(fs_info, i_qgroups[0]);
2321 0 : dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2322 :
2323 0 : if (!src || !dst) {
2324 : ret = -EINVAL;
2325 : goto unlock;
2326 : }
2327 :
2328 0 : dst->rfer = src->rfer - level_size;
2329 0 : dst->rfer_cmpr = src->rfer_cmpr - level_size;
2330 0 : i_qgroups += 2;
2331 : }
2332 0 : for (i = 0; i < inherit->num_excl_copies; ++i) {
2333 : struct btrfs_qgroup *src;
2334 : struct btrfs_qgroup *dst;
2335 :
2336 0 : src = find_qgroup_rb(fs_info, i_qgroups[0]);
2337 0 : dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2338 :
2339 0 : if (!src || !dst) {
2340 : ret = -EINVAL;
2341 : goto unlock;
2342 : }
2343 :
2344 0 : dst->excl = src->excl + level_size;
2345 0 : dst->excl_cmpr = src->excl_cmpr + level_size;
2346 0 : i_qgroups += 2;
2347 : }
2348 :
2349 : unlock:
2350 : spin_unlock(&fs_info->qgroup_lock);
2351 : out:
2352 195 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
2353 195 : return ret;
2354 : }
2355 :
2356 : /*
2357 : * reserve some space for a qgroup and all its parents. The reservation takes
2358 : * place with start_transaction or dealloc_reserve, similar to ENOSPC
2359 : * accounting. If not enough space is available, EDQUOT is returned.
2360 : * We assume that the requested space is new for all qgroups.
2361 : */
2362 11072 : int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2363 : {
2364 : struct btrfs_root *quota_root;
2365 : struct btrfs_qgroup *qgroup;
2366 22150 : struct btrfs_fs_info *fs_info = root->fs_info;
2367 11072 : u64 ref_root = root->root_key.objectid;
2368 : int ret = 0;
2369 : struct ulist_node *unode;
2370 : struct ulist_iterator uiter;
2371 :
2372 11072 : if (!is_fstree(ref_root))
2373 : return 0;
2374 :
2375 11072 : if (num_bytes == 0)
2376 : return 0;
2377 :
2378 : spin_lock(&fs_info->qgroup_lock);
2379 11078 : quota_root = fs_info->quota_root;
2380 11078 : if (!quota_root)
2381 : goto out;
2382 :
2383 : qgroup = find_qgroup_rb(fs_info, ref_root);
2384 11078 : if (!qgroup)
2385 : goto out;
2386 :
2387 : /*
2388 : * in a first step, we check all affected qgroups if any limits would
2389 : * be exceeded
2390 : */
2391 11078 : ulist_reinit(fs_info->qgroup_ulist);
2392 11078 : ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2393 : (uintptr_t)qgroup, GFP_ATOMIC);
2394 11078 : if (ret < 0)
2395 : goto out;
2396 11078 : ULIST_ITER_INIT(&uiter);
2397 37002 : while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2398 : struct btrfs_qgroup *qg;
2399 : struct btrfs_qgroup_list *glist;
2400 :
2401 14858 : qg = u64_to_ptr(unode->aux);
2402 :
2403 18724 : if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2404 3866 : qg->reserved + (s64)qg->rfer + num_bytes >
2405 3866 : qg->max_rfer) {
2406 : ret = -EDQUOT;
2407 : goto out;
2408 : }
2409 :
2410 14846 : if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2411 0 : qg->reserved + (s64)qg->excl + num_bytes >
2412 0 : qg->max_excl) {
2413 : ret = -EDQUOT;
2414 : goto out;
2415 : }
2416 :
2417 18626 : list_for_each_entry(glist, &qg->groups, next_group) {
2418 3780 : ret = ulist_add(fs_info->qgroup_ulist,
2419 : glist->group->qgroupid,
2420 3780 : (uintptr_t)glist->group, GFP_ATOMIC);
2421 3780 : if (ret < 0)
2422 : goto out;
2423 : }
2424 : }
2425 : ret = 0;
2426 : /*
2427 : * no limits exceeded, now record the reservation into all qgroups
2428 : */
2429 11066 : ULIST_ITER_INIT(&uiter);
2430 36968 : while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2431 : struct btrfs_qgroup *qg;
2432 :
2433 14836 : qg = u64_to_ptr(unode->aux);
2434 :
2435 14836 : qg->reserved += num_bytes;
2436 : }
2437 :
2438 : out:
2439 : spin_unlock(&fs_info->qgroup_lock);
2440 11078 : return ret;
2441 : }
2442 :
2443 14559 : void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
2444 : {
2445 : struct btrfs_root *quota_root;
2446 : struct btrfs_qgroup *qgroup;
2447 27812 : struct btrfs_fs_info *fs_info = root->fs_info;
2448 : struct ulist_node *unode;
2449 : struct ulist_iterator uiter;
2450 14559 : u64 ref_root = root->root_key.objectid;
2451 : int ret = 0;
2452 :
2453 14559 : if (!is_fstree(ref_root))
2454 1309 : return;
2455 :
2456 14558 : if (num_bytes == 0)
2457 : return;
2458 :
2459 : spin_lock(&fs_info->qgroup_lock);
2460 :
2461 13253 : quota_root = fs_info->quota_root;
2462 13253 : if (!quota_root)
2463 : goto out;
2464 :
2465 : qgroup = find_qgroup_rb(fs_info, ref_root);
2466 13253 : if (!qgroup)
2467 : goto out;
2468 :
2469 13253 : ulist_reinit(fs_info->qgroup_ulist);
2470 13253 : ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2471 : (uintptr_t)qgroup, GFP_ATOMIC);
2472 13253 : if (ret < 0)
2473 : goto out;
2474 13253 : ULIST_ITER_INIT(&uiter);
2475 45980 : while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2476 : struct btrfs_qgroup *qg;
2477 : struct btrfs_qgroup_list *glist;
2478 :
2479 19474 : qg = u64_to_ptr(unode->aux);
2480 :
2481 19474 : qg->reserved -= num_bytes;
2482 :
2483 25695 : list_for_each_entry(glist, &qg->groups, next_group) {
2484 6221 : ret = ulist_add(fs_info->qgroup_ulist,
2485 : glist->group->qgroupid,
2486 6221 : (uintptr_t)glist->group, GFP_ATOMIC);
2487 6221 : if (ret < 0)
2488 : goto out;
2489 : }
2490 : }
2491 :
2492 : out:
2493 : spin_unlock(&fs_info->qgroup_lock);
2494 : }
2495 :
2496 222008 : void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2497 : {
2498 444016 : if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
2499 222008 : return;
2500 0 : btrfs_err(trans->root->fs_info,
2501 : "qgroups not uptodate in trans handle %p: list is%s empty, "
2502 : "seq is %#x.%x",
2503 : trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
2504 : (u32)(trans->delayed_ref_elem.seq >> 32),
2505 : (u32)trans->delayed_ref_elem.seq);
2506 0 : BUG();
2507 : }
2508 :
2509 : /*
2510 : * returns < 0 on error, 0 when more leafs are to be scanned.
2511 : * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
2512 : */
2513 : static int
2514 71 : qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2515 : struct btrfs_trans_handle *trans, struct ulist *qgroups,
2516 2794 : struct ulist *tmp, struct extent_buffer *scratch_leaf)
2517 : {
2518 : struct btrfs_key found;
2519 71 : struct ulist *roots = NULL;
2520 71 : struct seq_list tree_mod_seq_elem = {};
2521 : u64 num_bytes;
2522 : u64 seq;
2523 : int new_roots;
2524 : int slot;
2525 : int ret;
2526 :
2527 71 : path->leave_spinning = 1;
2528 71 : mutex_lock(&fs_info->qgroup_rescan_lock);
2529 71 : ret = btrfs_search_slot_for_read(fs_info->extent_root,
2530 : &fs_info->qgroup_rescan_progress,
2531 : path, 1, 0);
2532 :
2533 71 : pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
2534 : fs_info->qgroup_rescan_progress.objectid,
2535 : fs_info->qgroup_rescan_progress.type,
2536 : fs_info->qgroup_rescan_progress.offset, ret);
2537 :
2538 71 : if (ret) {
2539 : /*
2540 : * The rescan is about to end, we will not be scanning any
2541 : * further blocks. We cannot unset the RESCAN flag here, because
2542 : * we want to commit the transaction if everything went well.
2543 : * To make the live accounting work in this phase, we set our
2544 : * scan progress pointer such that every real extent objectid
2545 : * will be smaller.
2546 : */
2547 7 : fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2548 7 : btrfs_release_path(path);
2549 7 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2550 : return ret;
2551 : }
2552 :
2553 64 : btrfs_item_key_to_cpu(path->nodes[0], &found,
2554 128 : btrfs_header_nritems(path->nodes[0]) - 1);
2555 64 : fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2556 :
2557 64 : btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2558 64 : memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
2559 64 : slot = path->slots[0];
2560 64 : btrfs_release_path(path);
2561 64 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2562 :
2563 5524 : for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2564 2730 : btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2565 2730 : if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2566 : found.type != BTRFS_METADATA_ITEM_KEY)
2567 36 : continue;
2568 2694 : if (found.type == BTRFS_METADATA_ITEM_KEY)
2569 0 : num_bytes = fs_info->extent_root->leafsize;
2570 : else
2571 2694 : num_bytes = found.offset;
2572 :
2573 2694 : ulist_reinit(qgroups);
2574 2694 : ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2575 : &roots);
2576 2694 : if (ret < 0)
2577 : goto out;
2578 : spin_lock(&fs_info->qgroup_lock);
2579 2694 : seq = fs_info->qgroup_seq;
2580 2694 : fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
2581 :
2582 2694 : new_roots = 0;
2583 2694 : ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2584 : seq, &new_roots, 1);
2585 2694 : if (ret < 0) {
2586 : spin_unlock(&fs_info->qgroup_lock);
2587 0 : ulist_free(roots);
2588 : goto out;
2589 : }
2590 :
2591 2694 : ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
2592 : seq, 0, new_roots, 1);
2593 2694 : if (ret < 0) {
2594 : spin_unlock(&fs_info->qgroup_lock);
2595 0 : ulist_free(roots);
2596 : goto out;
2597 : }
2598 : spin_unlock(&fs_info->qgroup_lock);
2599 2694 : ulist_free(roots);
2600 : }
2601 : out:
2602 64 : btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2603 :
2604 : return ret;
2605 : }
2606 :
2607 7 : static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2608 : {
2609 7 : struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2610 : qgroup_rescan_work);
2611 : struct btrfs_path *path;
2612 : struct btrfs_trans_handle *trans = NULL;
2613 : struct ulist *tmp = NULL, *qgroups = NULL;
2614 : struct extent_buffer *scratch_leaf = NULL;
2615 : int err = -ENOMEM;
2616 :
2617 7 : path = btrfs_alloc_path();
2618 7 : if (!path)
2619 : goto out;
2620 7 : qgroups = ulist_alloc(GFP_NOFS);
2621 7 : if (!qgroups)
2622 : goto out;
2623 7 : tmp = ulist_alloc(GFP_NOFS);
2624 7 : if (!tmp)
2625 : goto out;
2626 : scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2627 7 : if (!scratch_leaf)
2628 : goto out;
2629 :
2630 : err = 0;
2631 78 : while (!err) {
2632 71 : trans = btrfs_start_transaction(fs_info->fs_root, 0);
2633 71 : if (IS_ERR(trans)) {
2634 0 : err = PTR_ERR(trans);
2635 0 : break;
2636 : }
2637 71 : if (!fs_info->quota_enabled) {
2638 : err = -EINTR;
2639 : } else {
2640 71 : err = qgroup_rescan_leaf(fs_info, path, trans,
2641 : qgroups, tmp, scratch_leaf);
2642 : }
2643 71 : if (err > 0)
2644 7 : btrfs_commit_transaction(trans, fs_info->fs_root);
2645 : else
2646 64 : btrfs_end_transaction(trans, fs_info->fs_root);
2647 : }
2648 :
2649 : out:
2650 7 : kfree(scratch_leaf);
2651 7 : ulist_free(qgroups);
2652 7 : ulist_free(tmp);
2653 7 : btrfs_free_path(path);
2654 :
2655 7 : mutex_lock(&fs_info->qgroup_rescan_lock);
2656 7 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2657 :
2658 7 : if (err == 2 &&
2659 0 : fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2660 0 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2661 7 : } else if (err < 0) {
2662 0 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2663 : }
2664 7 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2665 :
2666 7 : if (err >= 0) {
2667 7 : btrfs_info(fs_info, "qgroup scan completed%s",
2668 : err == 2 ? " (inconsistency flag cleared)" : "");
2669 : } else {
2670 0 : btrfs_err(fs_info, "qgroup scan failed with %d", err);
2671 : }
2672 :
2673 7 : complete_all(&fs_info->qgroup_rescan_completion);
2674 7 : }
2675 :
2676 : /*
2677 : * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2678 : * memory required for the rescan context.
2679 : */
2680 : static int
2681 8 : qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2682 : int init_flags)
2683 : {
2684 : int ret = 0;
2685 :
2686 8 : if (!init_flags &&
2687 0 : (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2688 : !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2689 : ret = -EINVAL;
2690 : goto err;
2691 : }
2692 :
2693 8 : mutex_lock(&fs_info->qgroup_rescan_lock);
2694 : spin_lock(&fs_info->qgroup_lock);
2695 :
2696 8 : if (init_flags) {
2697 8 : if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2698 : ret = -EINPROGRESS;
2699 7 : else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2700 : ret = -EINVAL;
2701 :
2702 8 : if (ret) {
2703 : spin_unlock(&fs_info->qgroup_lock);
2704 1 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2705 1 : goto err;
2706 : }
2707 :
2708 7 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2709 : }
2710 :
2711 7 : memset(&fs_info->qgroup_rescan_progress, 0,
2712 : sizeof(fs_info->qgroup_rescan_progress));
2713 7 : fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2714 :
2715 : spin_unlock(&fs_info->qgroup_lock);
2716 7 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2717 :
2718 : init_completion(&fs_info->qgroup_rescan_completion);
2719 :
2720 7 : memset(&fs_info->qgroup_rescan_work, 0,
2721 : sizeof(fs_info->qgroup_rescan_work));
2722 7 : btrfs_init_work(&fs_info->qgroup_rescan_work,
2723 : btrfs_qgroup_rescan_helper,
2724 : btrfs_qgroup_rescan_worker, NULL, NULL);
2725 :
2726 7 : if (ret) {
2727 : err:
2728 1 : btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2729 1 : return ret;
2730 : }
2731 :
2732 : return 0;
2733 : }
2734 :
2735 : static void
2736 7 : qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2737 : {
2738 : struct rb_node *n;
2739 : struct btrfs_qgroup *qgroup;
2740 :
2741 : spin_lock(&fs_info->qgroup_lock);
2742 : /* clear all current qgroup tracking information */
2743 20 : for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2744 : qgroup = rb_entry(n, struct btrfs_qgroup, node);
2745 13 : qgroup->rfer = 0;
2746 13 : qgroup->rfer_cmpr = 0;
2747 13 : qgroup->excl = 0;
2748 13 : qgroup->excl_cmpr = 0;
2749 : }
2750 : spin_unlock(&fs_info->qgroup_lock);
2751 7 : }
2752 :
2753 : int
2754 2 : btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2755 : {
2756 : int ret = 0;
2757 : struct btrfs_trans_handle *trans;
2758 :
2759 2 : ret = qgroup_rescan_init(fs_info, 0, 1);
2760 2 : if (ret)
2761 : return ret;
2762 :
2763 : /*
2764 : * We have set the rescan_progress to 0, which means no more
2765 : * delayed refs will be accounted by btrfs_qgroup_account_ref.
2766 : * However, btrfs_qgroup_account_ref may be right after its call
2767 : * to btrfs_find_all_roots, in which case it would still do the
2768 : * accounting.
2769 : * To solve this, we're committing the transaction, which will
2770 : * ensure we run all delayed refs and only after that, we are
2771 : * going to clear all tracking information for a clean start.
2772 : */
2773 :
2774 1 : trans = btrfs_join_transaction(fs_info->fs_root);
2775 1 : if (IS_ERR(trans)) {
2776 0 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2777 0 : return PTR_ERR(trans);
2778 : }
2779 1 : ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2780 1 : if (ret) {
2781 0 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2782 0 : return ret;
2783 : }
2784 :
2785 1 : qgroup_rescan_zero_tracking(fs_info);
2786 :
2787 1 : btrfs_queue_work(fs_info->qgroup_rescan_workers,
2788 : &fs_info->qgroup_rescan_work);
2789 :
2790 1 : return 0;
2791 : }
2792 :
2793 2 : int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2794 : {
2795 : int running;
2796 : int ret = 0;
2797 :
2798 2 : mutex_lock(&fs_info->qgroup_rescan_lock);
2799 : spin_lock(&fs_info->qgroup_lock);
2800 2 : running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2801 : spin_unlock(&fs_info->qgroup_lock);
2802 2 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2803 :
2804 2 : if (running)
2805 2 : ret = wait_for_completion_interruptible(
2806 : &fs_info->qgroup_rescan_completion);
2807 :
2808 2 : return ret;
2809 : }
2810 :
2811 : /*
2812 : * this is only called from open_ctree where we're still single threaded, thus
2813 : * locking is omitted here.
2814 : */
2815 : void
2816 192 : btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2817 : {
2818 192 : if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2819 0 : btrfs_queue_work(fs_info->qgroup_rescan_workers,
2820 : &fs_info->qgroup_rescan_work);
2821 192 : }
|