diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 6934a5b8708fed..66fa39632cdea7 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1185,21 +1185,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) struct btrfs_space_info *sinfo = cache->space_info; u64 num_bytes; u64 sinfo_used; - u64 min_allocable_bytes; int ret = -ENOSPC; - /* - * We need some metadata space and system metadata space for - * allocating chunks in some corner cases until we force to set - * it to be readonly. - */ - if ((sinfo->flags & - (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && - !force) - min_allocable_bytes = SZ_1M; - else - min_allocable_bytes = 0; - spin_lock(&sinfo->lock); spin_lock(&cache->lock); @@ -1217,10 +1204,9 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) * sinfo_used + num_bytes should always <= sinfo->total_bytes. * * Here we make sure if we mark this bg RO, we still have enough - * free space as buffer (if min_allocable_bytes is not 0). + * free space as buffer. */ - if (sinfo_used + num_bytes + min_allocable_bytes <= - sinfo->total_bytes) { + if (sinfo_used + num_bytes <= sinfo->total_bytes) { sinfo->bytes_readonly += num_bytes; cache->ro++; list_add_tail(&cache->ro_list, &sinfo->ro_bgs); @@ -1233,8 +1219,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) btrfs_info(cache->fs_info, "unable to make block group %llu ro", cache->start); btrfs_info(cache->fs_info, - "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu", - sinfo_used, num_bytes, min_allocable_bytes); + "sinfo_used=%llu bg_num_bytes=%llu", + sinfo_used, num_bytes); btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); } return ret; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 0b52ab4cb96490..a0ce69f2d27cd9 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -629,7 +629,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfs_fs_devices *fs_devices) { - struct btrfs_fs_info *fs_info = state->fs_info; struct btrfs_super_block *selected_super; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; @@ -637,7 +636,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, int ret = 0; int pass; - BUG_ON(NULL == state); selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); if (NULL == selected_super) { pr_info("btrfsic: error, kmalloc failed!\n"); @@ -700,7 +698,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, break; } - num_copies = btrfs_num_copies(fs_info, next_bytenr, + num_copies = btrfs_num_copies(state->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) pr_info("num_copies(log_bytenr=%llu) = %d\n", diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ee834ef7beb4a3..3663c3128f1ed5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -447,7 +447,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, if (blkcg_css) { bio->bi_opf |= REQ_CGROUP_PUNT; - bio_associate_blkg_from_css(bio, blkcg_css); + kthread_associate_blkcg(blkcg_css); } refcount_set(&cb->pending_bios, 1); @@ -491,6 +491,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; + if (blkcg_css) + bio->bi_opf |= REQ_CGROUP_PUNT; bio_add_page(bio, page, PAGE_SIZE, 0); } if (bytes_left < PAGE_SIZE) { @@ -517,6 +519,9 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_endio(bio); } + if (blkcg_css) + kthread_associate_blkcg(NULL); + return 0; } @@ -758,7 +763,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_lookup_bio_sums(inode, comp_bio, - sums); + (u64)-1, sums); BUG_ON(ret); /* -ENOMEM */ } @@ -786,7 +791,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, BUG_ON(ret); /* -ENOMEM */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { - ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); + ret = btrfs_lookup_bio_sums(inode, comp_bio, (u64)-1, sums); BUG_ON(ret); /* -ENOMEM */ } @@ -1196,7 +1201,7 @@ static int btrfs_decompress_bio(struct compressed_bio *cb) { struct list_head *workspace; int ret; - int type = cb->compress_type; + int type = cb->compress_type & 0xF; workspace = get_workspace(type, 0); ret = compression_decompress_bio(type, workspace, cb); @@ -1215,7 +1220,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, { struct list_head *workspace; int ret; - + type = type & 0xF; workspace = get_workspace(type, 0); ret = compression_decompress(type, workspace, data_in, dest_page, start_byte, srclen, destlen); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 54efb21c272726..e416ef6c9415a7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2449,8 +2449,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len, int delalloc); -int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info, - u64 start, u64 len); +int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, + u64 len); void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -2789,9 +2789,7 @@ struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len); blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - u8 *dst); -blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, - u64 logical_offset); + u64 offset, u8 *dst); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, @@ -2877,7 +2875,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct btrfs_root *root); struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct page *page, size_t pg_offset, - u64 start, u64 end, int create); + u64 start, u64 end); int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0edfdc9c82bea..881aba162e4e23 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -202,8 +202,8 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, * that covers the entire device */ struct extent_map *btree_get_extent(struct btrfs_inode *inode, - struct page *page, size_t pg_offset, u64 start, u64 len, - int create) + struct page *page, size_t pg_offset, + u64 start, u64 len) { struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_map *em; @@ -3082,20 +3082,13 @@ int __cold open_ctree(struct super_block *sb, btrfs_free_extra_devids(fs_devices, 1); - ret = btrfs_sysfs_add_fsid(fs_devices, NULL); + ret = btrfs_sysfs_add_fsid(fs_devices); if (ret) { btrfs_err(fs_info, "failed to init sysfs fsid interface: %d", ret); goto fail_block_groups; } - ret = btrfs_sysfs_add_device(fs_devices); - if (ret) { - btrfs_err(fs_info, "failed to init sysfs device interface: %d", - ret); - goto fail_fsdev_sysfs; - } - ret = btrfs_sysfs_add_mounted(fs_info); if (ret) { btrfs_err(fs_info, "failed to init sysfs interface: %d", ret); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 76f123ebb29217..8c2d6cf1ce5969 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -134,8 +134,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)); struct extent_map *btree_get_extent(struct btrfs_inode *inode, - struct page *page, size_t pg_offset, u64 start, u64 len, - int create); + struct page *page, size_t pg_offset, + u64 start, u64 len); int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int __init btrfs_end_io_wq_init(void); void __cold btrfs_end_io_wq_exit(void); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 274318e9114eee..180b9b81d01aa0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3438,7 +3438,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache, */ struct find_free_extent_ctl { /* Basic allocation info */ - u64 ram_bytes; u64 num_bytes; u64 empty_size; u64 flags; @@ -3810,7 +3809,6 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, WARN_ON(num_bytes < fs_info->sectorsize); - ffe_ctl.ram_bytes = ram_bytes; ffe_ctl.num_bytes = num_bytes; ffe_ctl.empty_size = empty_size; ffe_ctl.flags = flags; @@ -4165,12 +4163,10 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, return ret; } -static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, - u64 start, u64 len, - int pin, int delalloc) +int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, + u64 start, u64 len, int delalloc) { struct btrfs_block_group *cache; - int ret = 0; cache = btrfs_lookup_block_group(fs_info, start); if (!cache) { @@ -4179,30 +4175,28 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, return -ENOSPC; } - if (pin) - pin_down_extent(cache, start, len, 1); - else { - if (btrfs_test_opt(fs_info, DISCARD)) - ret = btrfs_discard_extent(fs_info, start, len, NULL); - btrfs_add_free_space(cache, start, len); - btrfs_free_reserved_bytes(cache, len, delalloc); - trace_btrfs_reserved_extent_free(fs_info, start, len); - } + btrfs_add_free_space(cache, start, len); + btrfs_free_reserved_bytes(cache, len, delalloc); + trace_btrfs_reserved_extent_free(fs_info, start, len); btrfs_put_block_group(cache); - return ret; + return 0; } -int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, - u64 start, u64 len, int delalloc) +int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) { - return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc); -} + struct btrfs_block_group *cache; + int ret = 0; -int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info, - u64 start, u64 len) -{ - return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0); + cache = btrfs_lookup_block_group(fs_info, start); + if (!cache) { + btrfs_err(fs_info, "unable to find block group for %llu", start); + return -ENOSPC; + } + + ret = pin_down_extent(cache, start, len, 1); + btrfs_put_block_group(cache); + return ret; } static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2f4802f405a246..394beb474a693e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3043,7 +3043,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, *em_cached = NULL; } - em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0); + em = get_extent(BTRFS_I(inode), page, pg_offset, start, len); if (em_cached && !IS_ERR_OR_NULL(em)) { BUG_ON(*em_cached); refcount_inc(&em->refs); @@ -3455,11 +3455,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, update_nr_written(wbc, nr_written + 1); end = page_end; - if (i_size <= start) { - btrfs_writepage_endio_finish_ordered(page, start, page_end, 1); - goto done; - } - blocksize = inode->i_sb->s_blocksize; while (cur <= end) { @@ -3471,8 +3466,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, page_end, 1); break; } - em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur, - end - cur + 1, 1); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur, + end - cur + 1); if (IS_ERR_OR_NULL(em)) { SetPageError(page); ret = PTR_ERR_OR_ZERO(em); @@ -3497,22 +3492,11 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, */ if (compressed || block_start == EXTENT_MAP_HOLE || block_start == EXTENT_MAP_INLINE) { - /* - * end_io notification does not happen here for - * compressed extents - */ - if (!compressed) - btrfs_writepage_endio_finish_ordered(page, cur, - cur + iosize - 1, - 1); - else if (compressed) { - /* we don't want to end_page_writeback on - * a compressed extent. this happens - * elsewhere - */ + if (compressed) nr++; - } - + else + btrfs_writepage_endio_finish_ordered(page, cur, + cur + iosize - 1, 1); cur += iosize; pg_offset += iosize; continue; @@ -3540,7 +3524,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, pg_offset += iosize; nr++; } -done: *nr_ret = nr; return ret; } @@ -3562,7 +3545,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 page_end = start + PAGE_SIZE - 1; int ret; int nr = 0; - size_t pg_offset = 0; + size_t pg_offset; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_SHIFT; unsigned long nr_written = 0; @@ -3591,14 +3574,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, flush_dcache_page(page); } - pg_offset = 0; - set_page_extent_mapped(page); if (!epd->extent_locked) { ret = writepage_delalloc(inode, page, wbc, start, &nr_written); if (ret == 1) - goto done_unlocked; + return 0; if (ret) goto done; } @@ -3606,7 +3587,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ret = __extent_writepage_io(inode, page, wbc, epd, i_size, nr_written, &nr); if (ret == 1) - goto done_unlocked; + return 0; done: if (nr == 0) { @@ -3621,9 +3602,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unlock_page(page); ASSERT(ret <= 0); return ret; - -done_unlocked: - return 0; } void wait_on_extent_buffer_writeback(struct extent_buffer *eb) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a8551a1f56e247..5d205bbaafdcb1 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -183,10 +183,8 @@ static inline int extent_compress_type(unsigned long bio_flags) struct extent_map_tree; typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, - struct page *page, - size_t pg_offset, - u64 start, u64 len, - int create); + struct page *page, size_t pg_offset, + u64 start, u64 len); int try_release_extent_mapping(struct page *page, gfp_t mask); int try_release_extent_buffer(struct page *page); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b1bfdc5c1387ae..bb374042d29786 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -148,8 +148,19 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } -static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - u64 logical_offset, u8 *dst, int dio) +/** + * btrfs_lookup_bio_sums - Look up checksums for a bio. + * @inode: inode that the bio is for. + * @bio: bio embedded in btrfs_io_bio. + * @offset: Unless (u64)-1, look up checksums for this offset in the file. + * If (u64)-1, use the page offsets from the bio instead. + * @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If + * NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead. + * + * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. + */ +blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, + u64 offset, u8 *dst) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct bio_vec bvec; @@ -158,8 +169,8 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio struct btrfs_csum_item *item = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_path *path; + const bool page_offsets = (offset == (u64)-1); u8 *csum; - u64 offset = 0; u64 item_start_offset = 0; u64 item_last_offset = 0; u64 disk_bytenr; @@ -205,15 +216,13 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio } disk_bytenr = (u64)bio->bi_iter.bi_sector << 9; - if (dio) - offset = logical_offset; bio_for_each_segment(bvec, bio, iter) { page_bytes_left = bvec.bv_len; if (count) goto next; - if (!dio) + if (page_offsets) offset = page_offset(bvec.bv_page) + bvec.bv_offset; count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, csum, nblocks); @@ -285,18 +294,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio WARN_ON_ONCE(count); btrfs_free_path(path); - return 0; -} - -blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - u8 *dst) -{ - return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0); -} - -blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset) -{ - return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1); + return BLK_STS_OK; } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, @@ -483,8 +481,8 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, - 1); for (i = 0; i < nr_sectors; i++) { - if (offset >= ordered->file_offset + ordered->len || - offset < ordered->file_offset) { + if (offset >= ordered->file_offset + ordered->num_bytes || + offset < ordered->file_offset) { unsigned long bytes_left; sums->len = this_sum_bytes; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8d47c76b7bd107..a16da274c9aad0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -477,8 +477,7 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, u64 em_len; int ret = 0; - em = btrfs_get_extent(inode, NULL, 0, search_start, - search_len, 0); + em = btrfs_get_extent(inode, NULL, 0, search_start, search_len); if (IS_ERR(em)) return PTR_ERR(em); @@ -1501,7 +1500,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, ordered = btrfs_lookup_ordered_range(inode, start_pos, last_pos - start_pos + 1); if (ordered && - ordered->file_offset + ordered->len > start_pos && + ordered->file_offset + ordered->num_bytes > start_pos && ordered->file_offset <= last_pos) { unlock_extent_cached(&inode->io_tree, start_pos, last_pos, cached_state); @@ -2390,7 +2389,7 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, round_down(*start, fs_info->sectorsize), - round_up(*len, fs_info->sectorsize), 0); + round_up(*len, fs_info->sectorsize)); if (IS_ERR(em)) return PTR_ERR(em); @@ -2426,7 +2425,7 @@ static int btrfs_punch_hole_lock_range(struct inode *inode, * we need to try again. */ if ((!ordered || - (ordered->file_offset + ordered->len <= lockstart || + (ordered->file_offset + ordered->num_bytes <= lockstart || ordered->file_offset > lockend)) && !filemap_range_has_page(inode->i_mapping, lockstart, lockend)) { @@ -2957,7 +2956,7 @@ static int btrfs_zero_range_check_range_boundary(struct inode *inode, int ret; offset = round_down(offset, sectorsize); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) return PTR_ERR(em); @@ -2990,8 +2989,8 @@ static int btrfs_zero_range(struct inode *inode, inode_dio_wait(inode); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, - alloc_start, alloc_end - alloc_start, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start, + alloc_end - alloc_start); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; @@ -3034,8 +3033,8 @@ static int btrfs_zero_range(struct inode *inode, if (BTRFS_BYTES_TO_BLKS(fs_info, offset) == BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) { - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, - alloc_start, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start, + sectorsize); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; @@ -3248,7 +3247,7 @@ static long btrfs_fallocate(struct file *file, int mode, ordered = btrfs_lookup_first_ordered_extent(inode, locked_end); if (ordered && - ordered->file_offset + ordered->len > alloc_start && + ordered->file_offset + ordered->num_bytes > alloc_start && ordered->file_offset < alloc_end) { btrfs_put_ordered_extent(ordered); unlock_extent_cached(&BTRFS_I(inode)->io_tree, @@ -3273,7 +3272,7 @@ static long btrfs_fallocate(struct file *file, int mode, INIT_LIST_HEAD(&reserve_list); while (cur_offset < alloc_end) { em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, - alloc_end - cur_offset, 0); + alloc_end - cur_offset); if (IS_ERR(em)) { ret = PTR_ERR(em); break; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e3c76645cad76e..baf813672257d2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -44,7 +44,6 @@ #include "locking.h" #include "free-space-cache.h" #include "inode-map.h" -#include "backref.h" #include "props.h" #include "qgroup.h" #include "delalloc-space.h" @@ -64,7 +63,6 @@ struct btrfs_dio_data { static const struct inode_operations btrfs_dir_inode_operations; static const struct inode_operations btrfs_symlink_inode_operations; -static const struct inode_operations btrfs_dir_ro_inode_operations; static const struct inode_operations btrfs_special_inode_operations; static const struct inode_operations btrfs_file_inode_operations; static const struct address_space_operations btrfs_aops; @@ -229,7 +227,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, compressed_size -= cur_size; } btrfs_set_file_extent_compression(leaf, ei, - compress_type); + compress_type & 0XF); } else { page = find_get_page(inode->i_mapping, start >> PAGE_SHIFT); @@ -575,14 +573,31 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) } /* Compression level is applied here and only here */ - ret = btrfs_compress_pages( - compress_type | (fs_info->compress_level << 4), - inode->i_mapping, start, - pages, - &nr_pages, - &total_in, - &total_compressed); - + /* + * Check if the upper bits are set, and if so, + * take them as the compression level. + * the inode compression level takes precendence, if set + */ + if ((compress_type & 0xF) == compress_type) { + ret = btrfs_compress_pages( + compress_type | (fs_info->compress_level << 4), + inode->i_mapping, start, + pages, + &nr_pages, + &total_in, + &total_compressed); + } else { + int compress_level = btrfs_compress_set_level( + compress_type & 0xF, + compress_type>>4); + ret = btrfs_compress_pages( + compress_type | (compress_level << 4), + inode->i_mapping, start, + pages, + &nr_pages, + &total_in, + &total_compressed); + } if (!ret) { unsigned long offset = offset_in_page(total_compressed); struct page *page = pages[nr_pages - 1]; @@ -1479,10 +1494,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); /* - * If extent we got ends before our range starts, skip - * to next extent + * If the extent we got ends before our current offset, + * skip to the next extent. */ - if (extent_end <= start) { + if (extent_end <= cur_offset) { path->slots[0]++; goto next_slot; } @@ -2128,7 +2143,7 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, bio_flags); goto out; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums(inode, bio, NULL); + ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL); if (ret) goto out; } @@ -2394,649 +2409,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, return ret; } -/* snapshot-aware defrag */ -struct sa_defrag_extent_backref { - struct rb_node node; - struct old_sa_defrag_extent *old; - u64 root_id; - u64 inum; - u64 file_pos; - u64 extent_offset; - u64 num_bytes; - u64 generation; -}; - -struct old_sa_defrag_extent { - struct list_head list; - struct new_sa_defrag_extent *new; - - u64 extent_offset; - u64 bytenr; - u64 offset; - u64 len; - int count; -}; - -struct new_sa_defrag_extent { - struct rb_root root; - struct list_head head; - struct btrfs_path *path; - struct inode *inode; - u64 file_pos; - u64 len; - u64 bytenr; - u64 disk_len; - u8 compress_type; -}; - -static int backref_comp(struct sa_defrag_extent_backref *b1, - struct sa_defrag_extent_backref *b2) -{ - if (b1->root_id < b2->root_id) - return -1; - else if (b1->root_id > b2->root_id) - return 1; - - if (b1->inum < b2->inum) - return -1; - else if (b1->inum > b2->inum) - return 1; - - if (b1->file_pos < b2->file_pos) - return -1; - else if (b1->file_pos > b2->file_pos) - return 1; - - /* - * [------------------------------] ===> (a range of space) - * |<--->| |<---->| =============> (fs/file tree A) - * |<---------------------------->| ===> (fs/file tree B) - * - * A range of space can refer to two file extents in one tree while - * refer to only one file extent in another tree. - * - * So we may process a disk offset more than one time(two extents in A) - * and locate at the same extent(one extent in B), then insert two same - * backrefs(both refer to the extent in B). - */ - return 0; -} - -static void backref_insert(struct rb_root *root, - struct sa_defrag_extent_backref *backref) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct sa_defrag_extent_backref *entry; - int ret; - - while (*p) { - parent = *p; - entry = rb_entry(parent, struct sa_defrag_extent_backref, node); - - ret = backref_comp(backref, entry); - if (ret < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&backref->node, parent, p); - rb_insert_color(&backref->node, root); -} - -/* - * Note the backref might has changed, and in this case we just return 0. - */ -static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, - void *ctx) -{ - struct btrfs_file_extent_item *extent; - struct old_sa_defrag_extent *old = ctx; - struct new_sa_defrag_extent *new = old->new; - struct btrfs_path *path = new->path; - struct btrfs_key key; - struct btrfs_root *root; - struct sa_defrag_extent_backref *backref; - struct extent_buffer *leaf; - struct inode *inode = new->inode; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int slot; - int ret; - u64 extent_offset; - u64 num_bytes; - - if (BTRFS_I(inode)->root->root_key.objectid == root_id && - inum == btrfs_ino(BTRFS_I(inode))) - return 0; - - key.objectid = root_id; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - root = btrfs_read_fs_root_no_name(fs_info, &key); - if (IS_ERR(root)) { - if (PTR_ERR(root) == -ENOENT) - return 0; - WARN_ON(1); - btrfs_debug(fs_info, "inum=%llu, offset=%llu, root_id=%llu", - inum, offset, root_id); - return PTR_ERR(root); - } - - key.objectid = inum; - key.type = BTRFS_EXTENT_DATA_KEY; - if (offset > (u64)-1 << 32) - key.offset = 0; - else - key.offset = offset; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (WARN_ON(ret < 0)) - return ret; - ret = 0; - - while (1) { - cond_resched(); - - leaf = path->nodes[0]; - slot = path->slots[0]; - - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) { - goto out; - } else if (ret > 0) { - ret = 0; - goto out; - } - continue; - } - - path->slots[0]++; - - btrfs_item_key_to_cpu(leaf, &key, slot); - - if (key.objectid > inum) - goto out; - - if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY) - continue; - - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - - if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) - continue; - - /* - * 'offset' refers to the exact key.offset, - * NOT the 'offset' field in btrfs_extent_data_ref, ie. - * (key.offset - extent_offset). - */ - if (key.offset != offset) - continue; - - extent_offset = btrfs_file_extent_offset(leaf, extent); - num_bytes = btrfs_file_extent_num_bytes(leaf, extent); - - if (extent_offset >= old->extent_offset + old->offset + - old->len || extent_offset + num_bytes <= - old->extent_offset + old->offset) - continue; - break; - } - - backref = kmalloc(sizeof(*backref), GFP_NOFS); - if (!backref) { - ret = -ENOENT; - goto out; - } - - backref->root_id = root_id; - backref->inum = inum; - backref->file_pos = offset; - backref->num_bytes = num_bytes; - backref->extent_offset = extent_offset; - backref->generation = btrfs_file_extent_generation(leaf, extent); - backref->old = old; - backref_insert(&new->root, backref); - old->count++; -out: - btrfs_release_path(path); - WARN_ON(ret); - return ret; -} - -static noinline bool record_extent_backrefs(struct btrfs_path *path, - struct new_sa_defrag_extent *new) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); - struct old_sa_defrag_extent *old, *tmp; - int ret; - - new->path = path; - - list_for_each_entry_safe(old, tmp, &new->head, list) { - ret = iterate_inodes_from_logical(old->bytenr + - old->extent_offset, fs_info, - path, record_one_backref, - old, false); - if (ret < 0 && ret != -ENOENT) - return false; - - /* no backref to be processed for this extent */ - if (!old->count) { - list_del(&old->list); - kfree(old); - } - } - - if (list_empty(&new->head)) - return false; - - return true; -} - -static int relink_is_mergable(struct extent_buffer *leaf, - struct btrfs_file_extent_item *fi, - struct new_sa_defrag_extent *new) -{ - if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr) - return 0; - - if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) - return 0; - - if (btrfs_file_extent_compression(leaf, fi) != new->compress_type) - return 0; - - if (btrfs_file_extent_encryption(leaf, fi) || - btrfs_file_extent_other_encoding(leaf, fi)) - return 0; - - return 1; -} - -/* - * Note the backref might has changed, and in this case we just return 0. - */ -static noinline int relink_extent_backref(struct btrfs_path *path, - struct sa_defrag_extent_backref *prev, - struct sa_defrag_extent_backref *backref) -{ - struct btrfs_file_extent_item *extent; - struct btrfs_file_extent_item *item; - struct btrfs_ordered_extent *ordered; - struct btrfs_trans_handle *trans; - struct btrfs_ref ref = { 0 }; - struct btrfs_root *root; - struct btrfs_key key; - struct extent_buffer *leaf; - struct old_sa_defrag_extent *old = backref->old; - struct new_sa_defrag_extent *new = old->new; - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); - struct inode *inode; - struct extent_state *cached = NULL; - int ret = 0; - u64 start; - u64 len; - u64 lock_start; - u64 lock_end; - bool merge = false; - int index; - - if (prev && prev->root_id == backref->root_id && - prev->inum == backref->inum && - prev->file_pos + prev->num_bytes == backref->file_pos) - merge = true; - - /* step 1: get root */ - key.objectid = backref->root_id; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - index = srcu_read_lock(&fs_info->subvol_srcu); - - root = btrfs_read_fs_root_no_name(fs_info, &key); - if (IS_ERR(root)) { - srcu_read_unlock(&fs_info->subvol_srcu, index); - if (PTR_ERR(root) == -ENOENT) - return 0; - return PTR_ERR(root); - } - - if (btrfs_root_readonly(root)) { - srcu_read_unlock(&fs_info->subvol_srcu, index); - return 0; - } - - /* step 2: get inode */ - key.objectid = backref->inum; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - - inode = btrfs_iget(fs_info->sb, &key, root); - if (IS_ERR(inode)) { - srcu_read_unlock(&fs_info->subvol_srcu, index); - return 0; - } - - srcu_read_unlock(&fs_info->subvol_srcu, index); - - /* step 3: relink backref */ - lock_start = backref->file_pos; - lock_end = backref->file_pos + backref->num_bytes - 1; - lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end, - &cached); - - ordered = btrfs_lookup_first_ordered_extent(inode, lock_end); - if (ordered) { - btrfs_put_ordered_extent(ordered); - goto out_unlock; - } - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out_unlock; - } - - key.objectid = backref->inum; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = backref->file_pos; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - goto out_free_path; - } else if (ret > 0) { - ret = 0; - goto out_free_path; - } - - extent = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - - if (btrfs_file_extent_generation(path->nodes[0], extent) != - backref->generation) - goto out_free_path; - - btrfs_release_path(path); - - start = backref->file_pos; - if (backref->extent_offset < old->extent_offset + old->offset) - start += old->extent_offset + old->offset - - backref->extent_offset; - - len = min(backref->extent_offset + backref->num_bytes, - old->extent_offset + old->offset + old->len); - len -= max(backref->extent_offset, old->extent_offset + old->offset); - - ret = btrfs_drop_extents(trans, root, inode, start, - start + len, 1); - if (ret) - goto out_free_path; -again: - key.objectid = btrfs_ino(BTRFS_I(inode)); - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = start; - - path->leave_spinning = 1; - if (merge) { - struct btrfs_file_extent_item *fi; - u64 extent_len; - struct btrfs_key found_key; - - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) - goto out_free_path; - - path->slots[0]--; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - extent_len = btrfs_file_extent_num_bytes(leaf, fi); - - if (extent_len + found_key.offset == start && - relink_is_mergable(leaf, fi, new)) { - btrfs_set_file_extent_num_bytes(leaf, fi, - extent_len + len); - btrfs_mark_buffer_dirty(leaf); - inode_add_bytes(inode, len); - - ret = 1; - goto out_free_path; - } else { - merge = false; - btrfs_release_path(path); - goto again; - } - } - - ret = btrfs_insert_empty_item(trans, root, path, &key, - sizeof(*extent)); - if (ret) { - btrfs_abort_transaction(trans, ret); - goto out_free_path; - } - - leaf = path->nodes[0]; - item = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr); - btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len); - btrfs_set_file_extent_offset(leaf, item, start - new->file_pos); - btrfs_set_file_extent_num_bytes(leaf, item, len); - btrfs_set_file_extent_ram_bytes(leaf, item, new->len); - btrfs_set_file_extent_generation(leaf, item, trans->transid); - btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_compression(leaf, item, new->compress_type); - btrfs_set_file_extent_encryption(leaf, item, 0); - btrfs_set_file_extent_other_encoding(leaf, item, 0); - - btrfs_mark_buffer_dirty(leaf); - inode_add_bytes(inode, len); - btrfs_release_path(path); - - btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr, - new->disk_len, 0); - btrfs_init_data_ref(&ref, backref->root_id, backref->inum, - new->file_pos); /* start - extent_offset */ - ret = btrfs_inc_extent_ref(trans, &ref); - if (ret) { - btrfs_abort_transaction(trans, ret); - goto out_free_path; - } - - ret = 1; -out_free_path: - btrfs_release_path(path); - path->leave_spinning = 0; - btrfs_end_transaction(trans); -out_unlock: - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, - &cached); - iput(inode); - return ret; -} - -static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) -{ - struct old_sa_defrag_extent *old, *tmp; - - if (!new) - return; - - list_for_each_entry_safe(old, tmp, &new->head, list) { - kfree(old); - } - kfree(new); -} - -static void relink_file_extents(struct new_sa_defrag_extent *new) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); - struct btrfs_path *path; - struct sa_defrag_extent_backref *backref; - struct sa_defrag_extent_backref *prev = NULL; - struct rb_node *node; - int ret; - - path = btrfs_alloc_path(); - if (!path) - return; - - if (!record_extent_backrefs(path, new)) { - btrfs_free_path(path); - goto out; - } - btrfs_release_path(path); - - while (1) { - node = rb_first(&new->root); - if (!node) - break; - rb_erase(node, &new->root); - - backref = rb_entry(node, struct sa_defrag_extent_backref, node); - - ret = relink_extent_backref(path, prev, backref); - WARN_ON(ret < 0); - - kfree(prev); - - if (ret == 1) - prev = backref; - else - prev = NULL; - cond_resched(); - } - kfree(prev); - - btrfs_free_path(path); -out: - free_sa_defrag_extent(new); - - atomic_dec(&fs_info->defrag_running); - wake_up(&fs_info->transaction_wait); -} - -static struct new_sa_defrag_extent * -record_old_file_extents(struct inode *inode, - struct btrfs_ordered_extent *ordered) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_path *path; - struct btrfs_key key; - struct old_sa_defrag_extent *old; - struct new_sa_defrag_extent *new; - int ret; - - new = kmalloc(sizeof(*new), GFP_NOFS); - if (!new) - return NULL; - - new->inode = inode; - new->file_pos = ordered->file_offset; - new->len = ordered->len; - new->bytenr = ordered->start; - new->disk_len = ordered->disk_len; - new->compress_type = ordered->compress_type; - new->root = RB_ROOT; - INIT_LIST_HEAD(&new->head); - - path = btrfs_alloc_path(); - if (!path) - goto out_kfree; - - key.objectid = btrfs_ino(BTRFS_I(inode)); - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = new->file_pos; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out_free_path; - if (ret > 0 && path->slots[0] > 0) - path->slots[0]--; - - /* find out all the old extents for the file range */ - while (1) { - struct btrfs_file_extent_item *extent; - struct extent_buffer *l; - int slot; - u64 num_bytes; - u64 offset; - u64 end; - u64 disk_bytenr; - u64 extent_offset; - - l = path->nodes[0]; - slot = path->slots[0]; - - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out_free_path; - else if (ret > 0) - break; - continue; - } - - btrfs_item_key_to_cpu(l, &key, slot); - - if (key.objectid != btrfs_ino(BTRFS_I(inode))) - break; - if (key.type != BTRFS_EXTENT_DATA_KEY) - break; - if (key.offset >= new->file_pos + new->len) - break; - - extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item); - - num_bytes = btrfs_file_extent_num_bytes(l, extent); - if (key.offset + num_bytes < new->file_pos) - goto next; - - disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent); - if (!disk_bytenr) - goto next; - - extent_offset = btrfs_file_extent_offset(l, extent); - - old = kmalloc(sizeof(*old), GFP_NOFS); - if (!old) - goto out_free_path; - - offset = max(new->file_pos, key.offset); - end = min(new->file_pos + new->len, key.offset + num_bytes); - - old->bytenr = disk_bytenr; - old->extent_offset = extent_offset; - old->offset = offset - key.offset; - old->len = end - offset; - old->new = new; - old->count = 0; - list_add_tail(&old->list, &new->head); -next: - path->slots[0]++; - cond_resched(); - } - - btrfs_free_path(path); - atomic_inc(&fs_info->defrag_running); - - return new; - -out_free_path: - btrfs_free_path(path); -out_kfree: - free_sa_defrag_extent(new); - return NULL; -} - static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info, u64 start, u64 len) { @@ -3064,15 +2436,19 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) struct btrfs_trans_handle *trans = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; - struct new_sa_defrag_extent *new = NULL; + u64 start, end; int compress_type = 0; int ret = 0; - u64 logical_len = ordered_extent->len; + u64 logical_len = ordered_extent->num_bytes; bool freespace_inode; bool truncated = false; bool range_locked = false; bool clear_new_delalloc_bytes = false; bool clear_reserved_extent = true; + unsigned int clear_bits; + + start = ordered_extent->file_offset; + end = start + ordered_extent->num_bytes - 1; if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) && @@ -3086,10 +2462,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - btrfs_free_io_failure_record(BTRFS_I(inode), - ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1); + btrfs_free_io_failure_record(BTRFS_I(inode), start, end); if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { truncated = true; @@ -3107,8 +2480,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) * space for NOCOW range. * As NOCOW won't cause a new delayed ref, just free the space */ - btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, - ordered_extent->len); + btrfs_qgroup_free_data(inode, NULL, start, + ordered_extent->num_bytes); btrfs_ordered_update_i_size(inode, 0, ordered_extent); if (freespace_inode) trans = btrfs_join_transaction_spacecache(root); @@ -3127,23 +2500,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) } range_locked = true; - lock_extent_bits(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + ordered_extent->len - 1, - &cached_state); - - ret = test_range_bit(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + ordered_extent->len - 1, - EXTENT_DEFRAG, 0, cached_state); - if (ret) { - u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); - if (0 && last_snapshot >= BTRFS_I(inode)->generation) - /* the inode is shared */ - new = record_old_file_extents(inode, ordered_extent); - - clear_extent_bit(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + ordered_extent->len - 1, - EXTENT_DEFRAG, 0, 0, &cached_state); - } + lock_extent_bits(io_tree, start, end, &cached_state); if (freespace_inode) trans = btrfs_join_transaction_spacecache(root); @@ -3161,31 +2518,30 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) compress_type = ordered_extent->compress_type; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { BUG_ON(compress_type); - btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, - ordered_extent->len); + btrfs_qgroup_free_data(inode, NULL, start, + ordered_extent->num_bytes); ret = btrfs_mark_extent_written(trans, BTRFS_I(inode), ordered_extent->file_offset, ordered_extent->file_offset + logical_len); } else { BUG_ON(root == fs_info->tree_root); - ret = insert_reserved_file_extent(trans, inode, - ordered_extent->file_offset, - ordered_extent->start, - ordered_extent->disk_len, + ret = insert_reserved_file_extent(trans, inode, start, + ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes, logical_len, logical_len, compress_type, 0, 0, BTRFS_FILE_EXTENT_REG); if (!ret) { clear_reserved_extent = false; btrfs_release_delalloc_bytes(fs_info, - ordered_extent->start, - ordered_extent->disk_len); + ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes); } } unpin_extent_cache(&BTRFS_I(inode)->extent_tree, - ordered_extent->file_offset, ordered_extent->len, - trans->transid); + ordered_extent->file_offset, + ordered_extent->num_bytes, trans->transid); if (ret < 0) { btrfs_abort_transaction(trans, ret); goto out; @@ -3205,37 +2561,27 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) } ret = 0; out: - if (range_locked || clear_new_delalloc_bytes) { - unsigned int clear_bits = 0; - - if (range_locked) - clear_bits |= EXTENT_LOCKED; - if (clear_new_delalloc_bytes) - clear_bits |= EXTENT_DELALLOC_NEW; - clear_extent_bit(&BTRFS_I(inode)->io_tree, - ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, - clear_bits, - (clear_bits & EXTENT_LOCKED) ? 1 : 0, - 0, &cached_state); - } + clear_bits = EXTENT_DEFRAG; + if (range_locked) + clear_bits |= EXTENT_LOCKED; + if (clear_new_delalloc_bytes) + clear_bits |= EXTENT_DELALLOC_NEW; + clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, + (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0, + &cached_state); if (trans) btrfs_end_transaction(trans); if (ret || truncated) { - u64 start, end; + u64 unwritten_start = start; if (truncated) - start = ordered_extent->file_offset + logical_len; - else - start = ordered_extent->file_offset; - end = ordered_extent->file_offset + ordered_extent->len - 1; - clear_extent_uptodate(io_tree, start, end, NULL); + unwritten_start += logical_len; + clear_extent_uptodate(io_tree, unwritten_start, end, NULL); /* Drop the cache for the part of the extent we didn't write. */ - btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0); + btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0); /* * If the ordered extent had an IOERR or something else went @@ -3250,29 +2596,28 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) if ((ret || !logical_len) && clear_reserved_extent && !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && - !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) + !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { + /* + * Discard the range before returning it back to the + * free space pool + */ + if (ret && btrfs_test_opt(fs_info, DISCARD)) + btrfs_discard_extent(fs_info, + ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes, + NULL); btrfs_free_reserved_extent(fs_info, - ordered_extent->start, - ordered_extent->disk_len, 1); + ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes, 1); + } } - /* * This needs to be done to make sure anybody waiting knows we are done * updating everything for this ordered extent. */ btrfs_remove_ordered_extent(inode, ordered_extent); - /* for snapshot-aware defrag */ - if (new) { - if (ret) { - free_sa_defrag_extent(new); - atomic_dec(&fs_info->defrag_running); - } else { - relink_file_extents(new); - } - } - /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ @@ -5157,7 +4502,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) cur_offset = hole_start; while (1) { em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, - block_end - cur_offset, 0); + block_end - cur_offset); if (IS_ERR(em)) { err = PTR_ERR(em); em = NULL; @@ -5841,7 +5186,11 @@ static struct inode *new_simple_dir(struct super_block *s, set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; - inode->i_op = &btrfs_dir_ro_inode_operations; + /* + * We only need lookup, the rest is read-only and there's no inode + * associated with the dentry + */ + inode->i_op = &simple_dir_inode_operations; inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; @@ -6932,18 +6281,27 @@ static noinline int uncompress_inline(struct btrfs_path *path, return ret; } -/* - * a bit scary, this does extent mapping from logical file offset to the disk. - * the ugly parts come from merging extents from the disk with the in-ram - * representation. This gets more complex because of the data=ordered code, - * where the in-ram extents might be locked pending data=ordered completion. +/** + * btrfs_get_extent - Lookup the first extent overlapping a range in a file. + * @inode: file to search in + * @page: page to read extent data into if the extent is inline + * @pg_offset: offset into @page to copy to + * @start: file offset + * @len: length of range starting at @start + * + * This returns the first &struct extent_map which overlaps with the given + * range, reading it from the B-tree and caching it if necessary. Note that + * there may be more extents which overlap the given range after the returned + * extent_map. * - * This also copies inline extents directly into the page. + * If @page is not NULL and the extent is inline, this also reads the extent + * data directly into the page and marks the extent up to date in the io_tree. + * + * Return: ERR_PTR on error, non-NULL extent_map on success. */ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, - struct page *page, - size_t pg_offset, u64 start, u64 len, - int create) + struct page *page, size_t pg_offset, + u64 start, u64 len) { struct btrfs_fs_info *fs_info = inode->root->fs_info; int ret; @@ -6960,7 +6318,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct extent_map *em = NULL; struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_io_tree *io_tree = &inode->io_tree; - const bool new_inline = !page || create; read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); @@ -7083,8 +6440,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, goto insert; } - btrfs_extent_item_to_extent_map(inode, path, item, - new_inline, em); + btrfs_extent_item_to_extent_map(inode, path, item, !page, em); if (extent_type == BTRFS_FILE_EXTENT_REG || extent_type == BTRFS_FILE_EXTENT_PREALLOC) { @@ -7096,7 +6452,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, size_t extent_offset; size_t copy_size; - if (new_inline) + if (!page) goto out; size = btrfs_file_extent_ram_bytes(leaf, item); @@ -7179,7 +6535,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, u64 delalloc_end; int err = 0; - em = btrfs_get_extent(inode, NULL, 0, start, len, 0); + em = btrfs_get_extent(inode, NULL, 0, start, len); if (IS_ERR(em)) return em; /* @@ -7804,7 +7160,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, goto err; } - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); if (IS_ERR(em)) { ret = PTR_ERR(em); goto unlock_err; @@ -8356,8 +7712,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, * contention. */ if (dip->logical_offset == file_offset) { - ret = btrfs_lookup_bio_sums_dio(inode, dip->orig_bio, - file_offset); + ret = btrfs_lookup_bio_sums(inode, dip->orig_bio, file_offset, + NULL); if (ret) return ret; } @@ -8870,7 +8226,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start, page_end - start + 1); if (ordered) { - end = min(page_end, ordered->file_offset + ordered->len - 1); + end = min(page_end, + ordered->file_offset + ordered->num_bytes - 1); /* * IO on this page will never be started, so we need * to account for any ordered extents now @@ -9071,7 +8428,6 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) ret = VM_FAULT_SIGBUS; goto out_unlock; } - ret2 = 0; /* page is wholly or partially inside EOF */ if (page_start + PAGE_SIZE > size) @@ -9095,12 +8451,10 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) unlock_extent_cached(io_tree, page_start, page_end, &cached_state); - if (!ret2) { - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); - sb_end_pagefault(inode->i_sb); - extent_changeset_free(data_reserved); - return VM_FAULT_LOCKED; - } + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); + sb_end_pagefault(inode->i_sb); + extent_changeset_free(data_reserved); + return VM_FAULT_LOCKED; out_unlock: unlock_page(page); @@ -9398,7 +8752,7 @@ void btrfs_destroy_inode(struct inode *inode) else { btrfs_err(fs_info, "found ordered extent %llu %llu on inode cleanup", - ordered->file_offset, ordered->len); + ordered->file_offset, ordered->num_bytes); btrfs_remove_ordered_extent(inode, ordered); btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); @@ -10831,7 +10185,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, struct btrfs_block_group *bg; u64 len = isize - start; - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; @@ -10999,11 +10353,6 @@ static const struct inode_operations btrfs_dir_inode_operations = { .update_time = btrfs_update_time, .tmpfile = btrfs_tmpfile, }; -static const struct inode_operations btrfs_dir_ro_inode_operations = { - .lookup = btrfs_lookup, - .permission = btrfs_permission, - .update_time = btrfs_update_time, -}; static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 18e328ce4b5496..f8ff1563e9db13 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1128,7 +1128,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) /* get the big lock and read metadata off disk */ lock_extent_bits(io_tree, start, end, &cached); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); unlock_extent_cached(io_tree, start, end, &cached); if (IS_ERR(em)) @@ -1414,7 +1414,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, return -EINVAL; if (do_compress) { - if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES) + /* + * The bottom 4 bits of compress_type are for used for the + * compression type, the other bits for the compression level + */ + if ((range->compress_type & 0xF) >= BTRFS_NR_COMPRESS_TYPES) return -EINVAL; if (range->compress_type) compress_type = range->compress_type; @@ -1572,9 +1576,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, filemap_flush(inode->i_mapping); } - if (range->compress_type == BTRFS_COMPRESS_LZO) { + if ((range->compress_type & 0xF) == BTRFS_COMPRESS_LZO) { btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); - } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { + } else if ((range->compress_type & 0xF) == BTRFS_COMPRESS_ZSTD) { btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index fb09bc2f8e4d5a..ecb9fb6a6fe07f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -20,9 +20,9 @@ static struct kmem_cache *btrfs_ordered_extent_cache; static u64 entry_end(struct btrfs_ordered_extent *entry) { - if (entry->file_offset + entry->len < entry->file_offset) + if (entry->file_offset + entry->num_bytes < entry->file_offset) return (u64)-1; - return entry->file_offset + entry->len; + return entry->file_offset + entry->num_bytes; } /* returns NULL if the insertion worked, or it returns the node it did find @@ -52,14 +52,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, return NULL; } -static void ordered_data_tree_panic(struct inode *inode, int errno, - u64 offset) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - btrfs_panic(fs_info, errno, - "Inconsistency in ordered tree at offset %llu", offset); -} - /* * look for a given offset in the tree, and if it can't be found return the * first lesser offset @@ -120,7 +112,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) { if (file_offset < entry->file_offset || - entry->file_offset + entry->len <= file_offset) + entry->file_offset + entry->num_bytes <= file_offset) return 0; return 1; } @@ -129,7 +121,7 @@ static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, u64 len) { if (file_offset + len <= entry->file_offset || - entry->file_offset + entry->len <= file_offset) + entry->file_offset + entry->num_bytes <= file_offset) return 0; return 1; } @@ -161,19 +153,14 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, } /* allocate and add a new ordered_extent into the per-inode tree. - * file_offset is the logical offset in the file - * - * start is the disk block number of an extent already reserved in the - * extent allocation tree - * - * len is the length of the extent * * The tree is given a single reference on the ordered extent that was * inserted. */ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, - int type, int dio, int compress_type) + u64 disk_bytenr, u64 num_bytes, + u64 disk_num_bytes, int type, int dio, + int compress_type) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; @@ -187,10 +174,10 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, return -ENOMEM; entry->file_offset = file_offset; - entry->start = start; - entry->len = len; - entry->disk_len = disk_len; - entry->bytes_left = len; + entry->disk_bytenr = disk_bytenr; + entry->num_bytes = num_bytes; + entry->disk_num_bytes = disk_num_bytes; + entry->bytes_left = num_bytes; entry->inode = igrab(inode); entry->compress_type = compress_type; entry->truncated_len = (u64)-1; @@ -198,7 +185,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, set_bit(type, &entry->flags); if (dio) { - percpu_counter_add_batch(&fs_info->dio_bytes, len, + percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes, fs_info->delalloc_batch); set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); } @@ -219,7 +206,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, node = tree_insert(&tree->tree, file_offset, &entry->rb_node); if (node) - ordered_data_tree_panic(inode, -EEXIST, file_offset); + btrfs_panic(fs_info, -EEXIST, + "inconsistency in ordered tree at offset %llu", + file_offset); spin_unlock_irq(&tree->lock); spin_lock(&root->ordered_extent_lock); @@ -247,27 +236,30 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, } int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, int type) + u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes, + int type) { - return __btrfs_add_ordered_extent(inode, file_offset, start, len, - disk_len, type, 0, + return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, + num_bytes, disk_num_bytes, type, 0, BTRFS_COMPRESS_NONE); } int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, int type) + u64 disk_bytenr, u64 num_bytes, + u64 disk_num_bytes, int type) { - return __btrfs_add_ordered_extent(inode, file_offset, start, len, - disk_len, type, 1, + return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, + num_bytes, disk_num_bytes, type, 1, BTRFS_COMPRESS_NONE); } int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, - int type, int compress_type) + u64 disk_bytenr, u64 num_bytes, + u64 disk_num_bytes, int type, + int compress_type) { - return __btrfs_add_ordered_extent(inode, file_offset, start, len, - disk_len, type, 0, + return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, + num_bytes, disk_num_bytes, type, 0, compress_type); } @@ -328,8 +320,8 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, } dec_start = max(*file_offset, entry->file_offset); - dec_end = min(*file_offset + io_size, entry->file_offset + - entry->len); + dec_end = min(*file_offset + io_size, + entry->file_offset + entry->num_bytes); *file_offset = dec_end; if (dec_start > dec_end) { btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu", @@ -471,10 +463,11 @@ void btrfs_remove_ordered_extent(struct inode *inode, btrfs_mod_outstanding_extents(btrfs_inode, -1); spin_unlock(&btrfs_inode->lock); if (root != fs_info->tree_root) - btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false); + btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes, + false); if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) - percpu_counter_add_batch(&fs_info->dio_bytes, -entry->len, + percpu_counter_add_batch(&fs_info->dio_bytes, -entry->num_bytes, fs_info->delalloc_batch); tree = &btrfs_inode->ordered_tree; @@ -534,8 +527,8 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, ordered = list_first_entry(&splice, struct btrfs_ordered_extent, root_extent_list); - if (range_end <= ordered->start || - ordered->start + ordered->disk_len <= range_start) { + if (range_end <= ordered->disk_bytenr || + ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) { list_move_tail(&ordered->root_extent_list, &skipped); cond_resched_lock(&root->ordered_extent_lock); continue; @@ -619,7 +612,7 @@ void btrfs_start_ordered_extent(struct inode *inode, int wait) { u64 start = entry->file_offset; - u64 end = start + entry->len - 1; + u64 end = start + entry->num_bytes - 1; trace_btrfs_ordered_extent_start(inode, entry); @@ -680,7 +673,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) btrfs_put_ordered_extent(ordered); break; } - if (ordered->file_offset + ordered->len <= start) { + if (ordered->file_offset + ordered->num_bytes <= start) { btrfs_put_ordered_extent(ordered); break; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4eb0319a86d7a8..3beb4da4ab419a 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -67,14 +67,13 @@ struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; - /* disk byte number */ - u64 start; - - /* ram length of the extent in bytes */ - u64 len; - - /* extent length on disk */ - u64 disk_len; + /* + * These fields directly correspond to the same fields in + * btrfs_file_extent_item. + */ + u64 disk_bytenr; + u64 num_bytes; + u64 disk_num_bytes; /* number of bytes that still need writing */ u64 bytes_left; @@ -161,12 +160,15 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, u64 *file_offset, u64 io_size, int uptodate); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, int type); + u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes, + int type); int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, int type); + u64 disk_bytenr, u64 num_bytes, + u64 disk_num_bytes, int type); int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, - int type, int compress_type); + u64 disk_bytenr, u64 num_bytes, + u64 disk_num_bytes, int type, + int compress_type); void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 873b6b694107a1..61f44e78e3c9e7 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -317,7 +317,7 @@ void btrfs_print_leaf(struct extent_buffer *l) print_uuid_item(l, btrfs_item_ptr_offset(l, i), btrfs_item_size_nr(l, i)); break; - }; + } } } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d4282e12f2a64a..b046b04d7cce48 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1243,7 +1243,6 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *quota_root; struct btrfs_qgroup *parent; struct btrfs_qgroup *member; struct btrfs_qgroup_list *list; @@ -1259,9 +1258,8 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, return -ENOMEM; mutex_lock(&fs_info->qgroup_ioctl_lock); - quota_root = fs_info->quota_root; - if (!quota_root) { - ret = -EINVAL; + if (!fs_info->quota_root) { + ret = -ENOTCONN; goto out; } member = find_qgroup_rb(fs_info, src); @@ -1307,7 +1305,6 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *quota_root; struct btrfs_qgroup *parent; struct btrfs_qgroup *member; struct btrfs_qgroup_list *list; @@ -1320,9 +1317,8 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, if (!tmp) return -ENOMEM; - quota_root = fs_info->quota_root; - if (!quota_root) { - ret = -EINVAL; + if (!fs_info->quota_root) { + ret = -ENOTCONN; goto out; } @@ -1387,11 +1383,11 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) int ret = 0; mutex_lock(&fs_info->qgroup_ioctl_lock); - quota_root = fs_info->quota_root; - if (!quota_root) { - ret = -EINVAL; + if (!fs_info->quota_root) { + ret = -ENOTCONN; goto out; } + quota_root = fs_info->quota_root; qgroup = find_qgroup_rb(fs_info, qgroupid); if (qgroup) { ret = -EEXIST; @@ -1416,15 +1412,13 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; struct btrfs_qgroup_list *list; int ret = 0; mutex_lock(&fs_info->qgroup_ioctl_lock); - quota_root = fs_info->quota_root; - if (!quota_root) { - ret = -EINVAL; + if (!fs_info->quota_root) { + ret = -ENOTCONN; goto out; } @@ -1465,7 +1459,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, struct btrfs_qgroup_limit *limit) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; /* Sometimes we would want to clear the limit on this qgroup. @@ -1475,9 +1468,8 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, const u64 CLEAR_VALUE = -1; mutex_lock(&fs_info->qgroup_ioctl_lock); - quota_root = fs_info->quota_root; - if (!quota_root) { - ret = -EINVAL; + if (!fs_info->quota_root) { + ret = -ENOTCONN; goto out; } @@ -2578,10 +2570,9 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) int btrfs_run_qgroups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *quota_root = fs_info->quota_root; int ret = 0; - if (!quota_root) + if (!fs_info->quota_root) return ret; spin_lock(&fs_info->qgroup_lock); @@ -2875,7 +2866,6 @@ static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, enum btrfs_qgroup_rsv_type type) { - struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; struct btrfs_fs_info *fs_info = root->fs_info; u64 ref_root = root->root_key.objectid; @@ -2894,8 +2884,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, enforce = false; spin_lock(&fs_info->qgroup_lock); - quota_root = fs_info->quota_root; - if (!quota_root) + if (!fs_info->quota_root) goto out; qgroup = find_qgroup_rb(fs_info, ref_root); @@ -2962,7 +2951,6 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes, enum btrfs_qgroup_rsv_type type) { - struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; struct ulist_node *unode; struct ulist_iterator uiter; @@ -2980,8 +2968,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, } spin_lock(&fs_info->qgroup_lock); - quota_root = fs_info->quota_root; - if (!quota_root) + if (!fs_info->quota_root) goto out; qgroup = find_qgroup_rb(fs_info, ref_root); @@ -3681,7 +3668,6 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, int num_bytes) { - struct btrfs_root *quota_root = fs_info->quota_root; struct btrfs_qgroup *qgroup; struct ulist_node *unode; struct ulist_iterator uiter; @@ -3689,7 +3675,7 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, if (num_bytes == 0) return; - if (!quota_root) + if (!fs_info->quota_root) return; spin_lock(&fs_info->qgroup_lock); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c58245797f30af..af4dd49a71c78e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4291,6 +4291,15 @@ static void describe_relocation(struct btrfs_fs_info *fs_info, block_group->start, buf); } +static const char *stage_to_string(int stage) +{ + if (stage == MOVE_DATA_EXTENTS) + return "move data extents"; + if (stage == UPDATE_DATA_PTRS) + return "update data pointers"; + return "unknown"; +} + /* * function to relocate all extents in a block group. */ @@ -4365,12 +4374,15 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) rc->block_group->length); while (1) { + int finishes_stage; + mutex_lock(&fs_info->cleaner_mutex); ret = relocate_block_group(rc); mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) err = ret; + finishes_stage = rc->stage; /* * We may have gotten ENOSPC after we already dirtied some * extents. If writeout happens while we're relocating a @@ -4396,8 +4408,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) if (rc->extents_found == 0) break; - btrfs_info(fs_info, "found %llu extents", rc->extents_found); - + btrfs_info(fs_info, "found %llu extents, stage: %s", + rc->extents_found, stage_to_string(finishes_stage)); } WARN_ON(rc->block_group->pinned > 0); @@ -4615,7 +4627,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) LIST_HEAD(list); ordered = btrfs_lookup_ordered_extent(inode, file_pos); - BUG_ON(ordered->file_offset != file_pos || ordered->len != len); + BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len); disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr, @@ -4639,7 +4651,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) * disk_len vs real len like with real inodes since it's all * disk length. */ - new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); + new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr; sums->bytenr = new_bytenr; btrfs_add_ordered_sum(ordered, sums); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index f09aa6ee911322..537bc310a673f1 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -161,8 +161,7 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) static int can_overcommit(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 bytes, - enum btrfs_reserve_flush_enum flush, - bool system_chunk) + enum btrfs_reserve_flush_enum flush) { u64 profile; u64 avail; @@ -173,7 +172,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info, if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) return 0; - if (system_chunk) + if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM) profile = btrfs_system_alloc_profile(fs_info); else profile = btrfs_metadata_alloc_profile(fs_info); @@ -227,8 +226,7 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, /* Check and see if our ticket can be satisified now. */ if ((used + ticket->bytes <= space_info->total_bytes) || - can_overcommit(fs_info, space_info, ticket->bytes, flush, - false)) { + can_overcommit(fs_info, space_info, ticket->bytes, flush)) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, ticket->bytes); @@ -626,8 +624,7 @@ static void flush_space(struct btrfs_fs_info *fs_info, static inline u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - bool system_chunk) + struct btrfs_space_info *space_info) { struct reserve_ticket *ticket; u64 used; @@ -643,13 +640,12 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); if (can_overcommit(fs_info, space_info, to_reclaim, - BTRFS_RESERVE_FLUSH_ALL, system_chunk)) + BTRFS_RESERVE_FLUSH_ALL)) return 0; used = btrfs_space_info_used(space_info, true); - if (can_overcommit(fs_info, space_info, SZ_1M, - BTRFS_RESERVE_FLUSH_ALL, system_chunk)) + if (can_overcommit(fs_info, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL)) expected = div_factor_fine(space_info->total_bytes, 95); else expected = div_factor_fine(space_info->total_bytes, 90); @@ -665,7 +661,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, - u64 used, bool system_chunk) + u64 used) { u64 thresh = div_factor_fine(space_info->total_bytes, 98); @@ -673,8 +669,7 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) return 0; - if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info, - system_chunk)) + if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info)) return 0; return (used >= thresh && !btrfs_fs_closing(fs_info) && @@ -765,8 +760,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); spin_lock(&space_info->lock); - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, - false); + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info); if (!to_reclaim) { space_info->flush = 0; spin_unlock(&space_info->lock); @@ -785,8 +779,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) return; } to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, - space_info, - false); + space_info); if (last_tickets_id == space_info->tickets_id) { flush_state++; } else { @@ -858,8 +851,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, int flush_state; spin_lock(&space_info->lock); - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, - false); + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info); if (!to_reclaim) { spin_unlock(&space_info->lock); return; @@ -990,8 +982,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 orig_bytes, - enum btrfs_reserve_flush_enum flush, - bool system_chunk) + enum btrfs_reserve_flush_enum flush) { struct reserve_ticket ticket; u64 used; @@ -1013,8 +1004,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, */ if (!pending_tickets && ((used + orig_bytes <= space_info->total_bytes) || - can_overcommit(fs_info, space_info, orig_bytes, flush, - system_chunk))) { + can_overcommit(fs_info, space_info, orig_bytes, flush))) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); ret = 0; @@ -1054,8 +1044,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, * the async reclaim as we will panic. */ if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && - need_do_async_reclaim(fs_info, space_info, - used, system_chunk) && + need_do_async_reclaim(fs_info, space_info, used) && !work_busy(&fs_info->async_reclaim_work)) { trace_btrfs_trigger_flush(fs_info, space_info->flags, orig_bytes, flush, "preempt"); @@ -1092,10 +1081,9 @@ int btrfs_reserve_metadata_bytes(struct btrfs_root *root, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; int ret; - bool system_chunk = (root == fs_info->chunk_root); ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info, - orig_bytes, flush, system_chunk); + orig_bytes, flush); if (ret == -ENOSPC && unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { if (block_rsv != global_rsv && diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 5ebbe8a5ee76d6..16379f491ca1db 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -734,10 +734,10 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add) static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs) { - if (fs_devs->device_dir_kobj) { - kobject_del(fs_devs->device_dir_kobj); - kobject_put(fs_devs->device_dir_kobj); - fs_devs->device_dir_kobj = NULL; + if (fs_devs->devices_kobj) { + kobject_del(fs_devs->devices_kobj); + kobject_put(fs_devs->devices_kobj); + fs_devs->devices_kobj = NULL; } if (fs_devs->fsid_kobj.state_initialized) { @@ -969,15 +969,14 @@ int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices, struct hd_struct *disk; struct kobject *disk_kobj; - if (!fs_devices->device_dir_kobj) + if (!fs_devices->devices_kobj) return -EINVAL; if (one_device && one_device->bdev) { disk = one_device->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; - sysfs_remove_link(fs_devices->device_dir_kobj, - disk_kobj->name); + sysfs_remove_link(fs_devices->devices_kobj, disk_kobj->name); } if (one_device) @@ -990,25 +989,12 @@ int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices, disk = one_device->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; - sysfs_remove_link(fs_devices->device_dir_kobj, - disk_kobj->name); + sysfs_remove_link(fs_devices->devices_kobj, disk_kobj->name); } return 0; } -int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs) -{ - if (!fs_devs->device_dir_kobj) - fs_devs->device_dir_kobj = kobject_create_and_add("devices", - &fs_devs->fsid_kobj); - - if (!fs_devs->device_dir_kobj) - return -ENOMEM; - - return 0; -} - int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices, struct btrfs_device *one_device) { @@ -1028,7 +1014,7 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices, disk = dev->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; - error = sysfs_create_link(fs_devices->device_dir_kobj, + error = sysfs_create_link(fs_devices->devices_kobj, disk_kobj, disk_kobj->name); if (error) break; @@ -1067,23 +1053,33 @@ void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices, static struct kset *btrfs_kset; /* + * Creates: + * /sys/fs/btrfs/UUID + * * Can be called by the device discovery thread. - * And parent can be specified for seed device */ -int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, - struct kobject *parent) +int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs) { int error; init_completion(&fs_devs->kobj_unregister); fs_devs->fsid_kobj.kset = btrfs_kset; - error = kobject_init_and_add(&fs_devs->fsid_kobj, - &btrfs_ktype, parent, "%pU", fs_devs->fsid); + error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, NULL, + "%pU", fs_devs->fsid); if (error) { kobject_put(&fs_devs->fsid_kobj); return error; } + fs_devs->devices_kobj = kobject_create_and_add("devices", + &fs_devs->fsid_kobj); + if (!fs_devs->devices_kobj) { + btrfs_err(fs_devs->fs_info, + "failed to init sysfs device interface"); + kobject_put(&fs_devs->fsid_kobj); + return -ENOMEM; + } + return 0; } diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index e10c3adfc30fc6..3d27b39eaf9459 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h @@ -18,9 +18,7 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices, struct btrfs_device *one_device); int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices, struct btrfs_device *one_device); -int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, - struct kobject *parent); -int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); +int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs); void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices, const u8 *fsid); diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 09ecf7dc7b0829..24a8c714f56cf2 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -263,7 +263,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) /* First with no extents */ BTRFS_I(inode)->root = root; - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize); if (IS_ERR(em)) { em = NULL; test_err("got an error when we shouldn't have"); @@ -283,7 +283,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) */ setup_file_extents(root, sectorsize); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -305,7 +305,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -333,7 +333,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -356,7 +356,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) free_extent_map(em); /* Regular extent */ - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -384,7 +384,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) free_extent_map(em); /* The next 3 are split extents */ - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -413,7 +413,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -435,7 +435,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -469,7 +469,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) free_extent_map(em); /* Prealloc extent */ - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -498,7 +498,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) free_extent_map(em); /* The next 3 are a half written prealloc extent */ - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -528,7 +528,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -561,7 +561,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -596,7 +596,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) free_extent_map(em); /* Now for the compressed extent */ - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -630,7 +630,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) free_extent_map(em); /* Split compressed extent */ - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -665,7 +665,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -692,7 +692,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -727,8 +727,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) free_extent_map(em); /* A hole between regular extents but no hole extent */ - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6, - sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -755,7 +754,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -788,7 +787,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) offset = em->start + em->len; free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -872,7 +871,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) insert_inode_item_key(root); insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize, sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; @@ -894,8 +893,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) } free_extent_map(em); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize, - 2 * sectorsize, 0); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize, 2 * sectorsize); if (IS_ERR(em)) { test_err("got an error when we shouldn't have"); goto out; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 97f3520b8d98d2..a92f8a6dd19291 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -373,6 +373,104 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key, return 0; } +/* Inode item error output has the same format as dir_item_err() */ +#define inode_item_err(eb, slot, fmt, ...) \ + dir_item_err(eb, slot, fmt, __VA_ARGS__) + +static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key, + int slot) +{ + struct btrfs_key item_key; + bool is_inode_item; + + btrfs_item_key_to_cpu(leaf, &item_key, slot); + is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY); + + /* For XATTR_ITEM, location key should be all 0 */ + if (item_key.type == BTRFS_XATTR_ITEM_KEY) { + if (key->type != 0 || key->objectid != 0 || key->offset != 0) + return -EUCLEAN; + return 0; + } + + if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID || + key->objectid > BTRFS_LAST_FREE_OBJECTID) && + key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && + key->objectid != BTRFS_FREE_INO_OBJECTID) { + if (is_inode_item) { + generic_err(leaf, slot, + "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", + key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, + BTRFS_FIRST_FREE_OBJECTID, + BTRFS_LAST_FREE_OBJECTID, + BTRFS_FREE_INO_OBJECTID); + } else { + dir_item_err(leaf, slot, +"invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu", + key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, + BTRFS_FIRST_FREE_OBJECTID, + BTRFS_LAST_FREE_OBJECTID, + BTRFS_FREE_INO_OBJECTID); + } + return -EUCLEAN; + } + if (key->offset != 0) { + if (is_inode_item) + inode_item_err(leaf, slot, + "invalid key offset: has %llu expect 0", + key->offset); + else + dir_item_err(leaf, slot, + "invalid location key offset:has %llu expect 0", + key->offset); + return -EUCLEAN; + } + return 0; +} + +static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, + int slot) +{ + struct btrfs_key item_key; + bool is_root_item; + + btrfs_item_key_to_cpu(leaf, &item_key, slot); + is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY); + + /* No such tree id */ + if (key->objectid == 0) { + if (is_root_item) + generic_err(leaf, slot, "invalid root id 0"); + else + dir_item_err(leaf, slot, + "invalid location key root id 0"); + return -EUCLEAN; + } + + /* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */ + if (!is_fstree(key->objectid) && !is_root_item) { + dir_item_err(leaf, slot, + "invalid location key objectid, have %llu expect [%llu, %llu]", + key->objectid, BTRFS_FIRST_FREE_OBJECTID, + BTRFS_LAST_FREE_OBJECTID); + return -EUCLEAN; + } + + /* + * ROOT_ITEM with non-zero offset means this is a snapshot, created at + * @offset transid. + * Furthermore, for location key in DIR_ITEM, its offset is always -1. + * + * So here we only check offset for reloc tree whose key->offset must + * be a valid tree. + */ + if (key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0) { + generic_err(leaf, slot, "invalid root id 0 for reloc tree"); + return -EUCLEAN; + } + return 0; +} + static int check_dir_item(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_key *prev_key, int slot) @@ -386,12 +484,14 @@ static int check_dir_item(struct extent_buffer *leaf, return -EUCLEAN; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); while (cur < item_size) { + struct btrfs_key location_key; u32 name_len; u32 data_len; u32 max_name_len; u32 total_size; u32 name_hash; u8 dir_type; + int ret; /* header itself should not cross item boundary */ if (cur + sizeof(*di) > item_size) { @@ -401,6 +501,25 @@ static int check_dir_item(struct extent_buffer *leaf, return -EUCLEAN; } + /* Location key check */ + btrfs_dir_item_key_to_cpu(leaf, di, &location_key); + if (location_key.type == BTRFS_ROOT_ITEM_KEY) { + ret = check_root_key(leaf, &location_key, slot); + if (ret < 0) + return ret; + } else if (location_key.type == BTRFS_INODE_ITEM_KEY || + location_key.type == 0) { + ret = check_inode_key(leaf, &location_key, slot); + if (ret < 0) + return ret; + } else { + dir_item_err(leaf, slot, + "invalid location key type, have %u, expect %u or %u", + location_key.type, BTRFS_ROOT_ITEM_KEY, + BTRFS_INODE_ITEM_KEY); + return -EUCLEAN; + } + /* dir type check */ dir_type = btrfs_dir_type(leaf, di); if (dir_type >= BTRFS_FT_MAX) { @@ -738,6 +857,44 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, return 0; } +/* + * Enhanced version of chunk item checker. + * + * The common btrfs_check_chunk_valid() doesn't check item size since it needs + * to work on super block sys_chunk_array which doesn't have full item ptr. + */ +static int check_leaf_chunk_item(struct extent_buffer *leaf, + struct btrfs_chunk *chunk, + struct btrfs_key *key, int slot) +{ + int num_stripes; + + if (btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) { + chunk_err(leaf, chunk, key->offset, + "invalid chunk item size: have %u expect [%zu, %u)", + btrfs_item_size_nr(leaf, slot), + sizeof(struct btrfs_chunk), + BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); + return -EUCLEAN; + } + + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + /* Let btrfs_check_chunk_valid() handle this error type */ + if (num_stripes == 0) + goto out; + + if (btrfs_chunk_item_size(num_stripes) != + btrfs_item_size_nr(leaf, slot)) { + chunk_err(leaf, chunk, key->offset, + "invalid chunk item size: have %u expect %lu", + btrfs_item_size_nr(leaf, slot), + btrfs_chunk_item_size(num_stripes)); + return -EUCLEAN; + } +out: + return btrfs_check_chunk_valid(leaf, chunk, key->offset); +} + __printf(3, 4) __cold static void dev_item_err(const struct extent_buffer *eb, int slot, @@ -801,7 +958,7 @@ static int check_dev_item(struct extent_buffer *leaf, } /* Inode item error output has the same format as dir_item_err() */ -#define inode_item_err(fs_info, eb, slot, fmt, ...) \ +#define inode_item_err(eb, slot, fmt, ...) \ dir_item_err(eb, slot, fmt, __VA_ARGS__) static int check_inode_item(struct extent_buffer *leaf, @@ -812,30 +969,17 @@ static int check_inode_item(struct extent_buffer *leaf, u64 super_gen = btrfs_super_generation(fs_info->super_copy); u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); u32 mode; + int ret; + + ret = check_inode_key(leaf, key, slot); + if (ret < 0) + return ret; - if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID || - key->objectid > BTRFS_LAST_FREE_OBJECTID) && - key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && - key->objectid != BTRFS_FREE_INO_OBJECTID) { - generic_err(leaf, slot, - "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", - key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, - BTRFS_FIRST_FREE_OBJECTID, - BTRFS_LAST_FREE_OBJECTID, - BTRFS_FREE_INO_OBJECTID); - return -EUCLEAN; - } - if (key->offset != 0) { - inode_item_err(fs_info, leaf, slot, - "invalid key offset: has %llu expect 0", - key->offset); - return -EUCLEAN; - } iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); /* Here we use super block generation + 1 to handle log tree */ if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) { - inode_item_err(fs_info, leaf, slot, + inode_item_err(leaf, slot, "invalid inode generation: has %llu expect (0, %llu]", btrfs_inode_generation(leaf, iitem), super_gen + 1); @@ -843,7 +987,7 @@ static int check_inode_item(struct extent_buffer *leaf, } /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) { - inode_item_err(fs_info, leaf, slot, + inode_item_err(leaf, slot, "invalid inode generation: has %llu expect [0, %llu]", btrfs_inode_transid(leaf, iitem), super_gen + 1); return -EUCLEAN; @@ -856,7 +1000,7 @@ static int check_inode_item(struct extent_buffer *leaf, */ mode = btrfs_inode_mode(leaf, iitem); if (mode & ~valid_mask) { - inode_item_err(fs_info, leaf, slot, + inode_item_err(leaf, slot, "unknown mode bit detected: 0x%x", mode & ~valid_mask); return -EUCLEAN; @@ -869,20 +1013,20 @@ static int check_inode_item(struct extent_buffer *leaf, */ if (!has_single_bit_set(mode & S_IFMT)) { if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) { - inode_item_err(fs_info, leaf, slot, + inode_item_err(leaf, slot, "invalid mode: has 0%o expect valid S_IF* bit(s)", mode & S_IFMT); return -EUCLEAN; } } if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) { - inode_item_err(fs_info, leaf, slot, + inode_item_err(leaf, slot, "invalid nlink: has %u expect no more than 1 for dir", btrfs_inode_nlink(leaf, iitem)); return -EUCLEAN; } if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) { - inode_item_err(fs_info, leaf, slot, + inode_item_err(leaf, slot, "unknown flags detected: 0x%llx", btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK); @@ -898,22 +1042,11 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_root_item ri; const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY | BTRFS_ROOT_SUBVOL_DEAD; + int ret; - /* No such tree id */ - if (key->objectid == 0) { - generic_err(leaf, slot, "invalid root id 0"); - return -EUCLEAN; - } - - /* - * Some older kernel may create ROOT_ITEM with non-zero offset, so here - * we only check offset for reloc tree whose key->offset must be a - * valid tree. - */ - if (key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0) { - generic_err(leaf, slot, "invalid root id 0 for reloc tree"); - return -EUCLEAN; - } + ret = check_root_key(leaf, key, slot); + if (ret < 0) + return ret; if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) { generic_err(leaf, slot, @@ -1302,8 +1435,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return 0; } -#define inode_ref_err(fs_info, eb, slot, fmt, args...) \ - inode_item_err(fs_info, eb, slot, fmt, ##args) +#define inode_ref_err(eb, slot, fmt, args...) \ + inode_item_err(eb, slot, fmt, ##args) static int check_inode_ref(struct extent_buffer *leaf, struct btrfs_key *key, struct btrfs_key *prev_key, int slot) @@ -1316,7 +1449,7 @@ static int check_inode_ref(struct extent_buffer *leaf, return -EUCLEAN; /* namelen can't be 0, so item_size == sizeof() is also invalid */ if (btrfs_item_size_nr(leaf, slot) <= sizeof(*iref)) { - inode_ref_err(fs_info, leaf, slot, + inode_ref_err(leaf, slot, "invalid item size, have %u expect (%zu, %u)", btrfs_item_size_nr(leaf, slot), sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); @@ -1329,7 +1462,7 @@ static int check_inode_ref(struct extent_buffer *leaf, u16 namelen; if (ptr + sizeof(iref) > end) { - inode_ref_err(fs_info, leaf, slot, + inode_ref_err(leaf, slot, "inode ref overflow, ptr %lu end %lu inode_ref_size %zu", ptr, end, sizeof(iref)); return -EUCLEAN; @@ -1338,7 +1471,7 @@ static int check_inode_ref(struct extent_buffer *leaf, iref = (struct btrfs_inode_ref *)ptr; namelen = btrfs_inode_ref_name_len(leaf, iref); if (ptr + sizeof(*iref) + namelen > end) { - inode_ref_err(fs_info, leaf, slot, + inode_ref_err(leaf, slot, "inode ref overflow, ptr %lu end %lu namelen %u", ptr, end, namelen); return -EUCLEAN; @@ -1384,7 +1517,7 @@ static int check_leaf_item(struct extent_buffer *leaf, break; case BTRFS_CHUNK_ITEM_KEY: chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); - ret = btrfs_check_chunk_valid(leaf, chunk, key->offset); + ret = check_leaf_chunk_item(leaf, chunk, key, slot); break; case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d3f115909ff05a..a2bae5c230e19c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2674,14 +2674,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, u32 blocksize; int ret = 0; - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - while (*level > 0) { struct btrfs_key first_key; - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; WARN_ON(btrfs_header_level(cur) != *level); @@ -2732,9 +2727,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_and_pin_reserved_extent( - fs_info, bytenr, - blocksize); + ret = btrfs_pin_reserved_extent(fs_info, + bytenr, blocksize); if (ret) { free_extent_buffer(next); return ret; @@ -2749,7 +2743,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, return ret; } - WARN_ON(*level <= 0); if (path->nodes[*level-1]) free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; @@ -2757,9 +2750,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, path->slots[*level] = 0; cond_resched(); } - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); cond_resched(); @@ -2815,8 +2805,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_and_pin_reserved_extent( - fs_info, + ret = btrfs_pin_reserved_extent(fs_info, path->nodes[*level]->start, path->nodes[*level]->len); if (ret) @@ -2896,10 +2885,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, clear_extent_buffer_dirty(next); } - WARN_ON(log->root_key.objectid != - BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_and_pin_reserved_extent(fs_info, - next->start, next->len); + ret = btrfs_pin_reserved_extent(fs_info, next->start, + next->len); if (ret) goto out; } @@ -3935,7 +3922,7 @@ static int log_csums(struct btrfs_trans_handle *trans, static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *dst_path, - struct btrfs_path *src_path, u64 *last_extent, + struct btrfs_path *src_path, int start_slot, int nr, int inode_only, u64 logged_isize) { @@ -3946,7 +3933,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *extent; struct btrfs_inode_item *inode_item; struct extent_buffer *src = src_path->nodes[0]; - struct btrfs_key first_key, last_key, key; int ret; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -3954,9 +3940,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, int i; struct list_head ordered_sums; int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; - bool has_extents = false; - bool need_find_last_extent = true; - bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3965,8 +3948,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!ins_data) return -ENOMEM; - first_key.objectid = (u64)-1; - ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); @@ -3987,9 +3968,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + i); - if (i == nr - 1) - last_key = ins_keys[i]; - if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { inode_item = btrfs_item_ptr(dst_path->nodes[0], dst_path->slots[0], @@ -4003,20 +3981,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset, ins_sizes[i]); } - /* - * We set need_find_last_extent here in case we know we were - * processing other items and then walk into the first extent in - * the inode. If we don't hit an extent then nothing changes, - * we'll do the last search the next time around. - */ - if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { - has_extents = true; - if (first_key.objectid == (u64)-1) - first_key = ins_keys[i]; - } else { - need_find_last_extent = false; - } - /* take a reference on file data extents so that truncates * or deletes of this inode don't have to relog the inode * again @@ -4082,167 +4046,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, kfree(sums); } - if (!has_extents) - return ret; - - if (need_find_last_extent && *last_extent == first_key.offset) { - /* - * We don't have any leafs between our current one and the one - * we processed before that can have file extent items for our - * inode (and have a generation number smaller than our current - * transaction id). - */ - need_find_last_extent = false; - } - - /* - * Because we use btrfs_search_forward we could skip leaves that were - * not modified and then assume *last_extent is valid when it really - * isn't. So back up to the previous leaf and read the end of the last - * extent before we go and fill in holes. - */ - if (need_find_last_extent) { - u64 len; - - ret = btrfs_prev_leaf(inode->root, src_path); - if (ret < 0) - return ret; - if (ret) - goto fill_holes; - if (src_path->slots[0]) - src_path->slots[0]--; - src = src_path->nodes[0]; - btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) - goto fill_holes; - extent = btrfs_item_ptr(src, src_path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(src, extent); - *last_extent = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - *last_extent = key.offset + len; - } - } -fill_holes: - /* So we did prev_leaf, now we need to move to the next leaf, but a few - * things could have happened - * - * 1) A merge could have happened, so we could currently be on a leaf - * that holds what we were copying in the first place. - * 2) A split could have happened, and now not all of the items we want - * are on the same leaf. - * - * So we need to adjust how we search for holes, we need to drop the - * path and re-search for the first extent key we found, and then walk - * forward until we hit the last one we copied. - */ - if (need_find_last_extent) { - /* btrfs_prev_leaf could return 1 without releasing the path */ - btrfs_release_path(src_path); - ret = btrfs_search_slot(NULL, inode->root, &first_key, - src_path, 0, 0); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = src_path->slots[0]; - } else { - i = start_slot; - } - - /* - * Ok so here we need to go through and fill in any holes we may have - * to make sure that holes are punched for those areas in case they had - * extents previously. - */ - while (!done) { - u64 offset, len; - u64 extent_end; - - if (i >= btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = 0; - need_find_last_extent = true; - } - - btrfs_item_key_to_cpu(src, &key, i); - if (!btrfs_comp_cpu_keys(&key, &last_key)) - done = true; - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) { - i++; - continue; - } - extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(src, extent); - extent_end = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - extent_end = key.offset + len; - } - i++; - - if (*last_extent == key.offset) { - *last_extent = extent_end; - continue; - } - offset = *last_extent; - len = key.offset - *last_extent; - ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), - offset, 0, 0, len, 0, len, 0, 0, 0); - if (ret) - break; - *last_extent = extent_end; - } - - /* - * Check if there is a hole between the last extent found in our leaf - * and the first extent in the next leaf. If there is one, we need to - * log an explicit hole so that at replay time we can punch the hole. - */ - if (ret == 0 && - key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - i == btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - need_find_last_extent = true; - if (ret > 0) { - ret = 0; - } else if (ret == 0) { - btrfs_item_key_to_cpu(src_path->nodes[0], &key, - src_path->slots[0]); - if (key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - *last_extent < key.offset) { - const u64 len = key.offset - *last_extent; - - ret = btrfs_insert_file_extent(trans, log, - btrfs_ino(inode), - *last_extent, 0, - 0, len, 0, len, - 0, 0, 0); - *last_extent += len; - } - } - } - /* - * Need to let the callers know we dropped the path so they should - * re-search. - */ - if (!ret && need_find_last_extent) - ret = 1; return ret; } @@ -4407,7 +4210,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 i_size = i_size_read(&inode->vfs_inode); const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; - u64 last_extent = (u64)-1; + bool dropped_extents = false; int ins_nr = 0; int start_slot; int ret; @@ -4429,8 +4232,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); + start_slot, ins_nr, 1, 0); if (ret < 0) goto out; ins_nr = 0; @@ -4454,8 +4256,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, path->slots[0]++; continue; } - if (last_extent == (u64)-1) { - last_extent = key.offset; + if (!dropped_extents) { /* * Avoid logging extent items logged in past fsync calls * and leading to duplicate keys in the log tree. @@ -4469,6 +4270,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } while (ret == -EAGAIN); if (ret) goto out; + dropped_extents = true; } if (ins_nr == 0) start_slot = slot; @@ -4483,7 +4285,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } } if (ins_nr > 0) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, start_slot, ins_nr, 1, 0); if (ret > 0) ret = 0; @@ -4670,13 +4472,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, if (slot >= nritems) { if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; ins_nr = 0; @@ -4700,13 +4497,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, cond_resched(); } if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; } @@ -4715,100 +4507,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, } /* - * If the no holes feature is enabled we need to make sure any hole between the - * last extent and the i_size of our inode is explicitly marked in the log. This - * is to make sure that doing something like: - * - * 1) create file with 128Kb of data - * 2) truncate file to 64Kb - * 3) truncate file to 256Kb - * 4) fsync file - * 5) - * 6) mount fs and trigger log replay - * - * Will give us a file with a size of 256Kb, the first 64Kb of data match what - * the file had in its first 64Kb of data at step 1 and the last 192Kb of the - * file correspond to a hole. The presence of explicit holes in a log tree is - * what guarantees that log replay will remove/adjust file extent items in the - * fs/subvol tree. - * - * Here we do not need to care about holes between extents, that is already done - * by copy_items(). We also only need to do this in the full sync path, where we - * lookup for extents from the fs/subvol tree only. In the fast path case, we - * lookup the list of modified extent maps and if any represents a hole, we - * insert a corresponding extent representing a hole in the log tree. + * When using the NO_HOLES feature if we punched a hole that causes the + * deletion of entire leafs or all the extent items of the first leaf (the one + * that contains the inode item and references) we may end up not processing + * any extents, because there are no leafs with a generation matching the + * current transaction that have extent items for our inode. So we need to find + * if any holes exist and then log them. We also need to log holes after any + * truncate operation that changes the inode's size. */ -static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_inode *inode, - struct btrfs_path *path) +static int btrfs_log_holes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_inode *inode, + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; - int ret; struct btrfs_key key; - u64 hole_start; - u64 hole_size; - struct extent_buffer *leaf; - struct btrfs_root *log = root->log_root; const u64 ino = btrfs_ino(inode); const u64 i_size = i_size_read(&inode->vfs_inode); + u64 prev_extent_end = 0; + int ret; - if (!btrfs_fs_incompat(fs_info, NO_HOLES)) + if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) return 0; key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = (u64)-1; + key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - ASSERT(ret != 0); if (ret < 0) return ret; - ASSERT(path->slots[0] > 0); - path->slots[0]--; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - - if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { - /* inode does not have any extents */ - hole_start = 0; - hole_size = i_size; - } else { + while (true) { struct btrfs_file_extent_item *extent; + struct extent_buffer *leaf = path->nodes[0]; u64 len; - /* - * If there's an extent beyond i_size, an explicit hole was - * already inserted by copy_items(). - */ - if (key.offset >= i_size) - return 0; + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) + break; + + /* We have a hole, log it. */ + if (prev_extent_end < key.offset) { + const u64 hole_len = key.offset - prev_extent_end; + + /* + * Release the path to avoid deadlocks with other code + * paths that search the root while holding locks on + * leafs from the log root. + */ + btrfs_release_path(path); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, + 0, hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + + /* + * Search for the same key again in the root. Since it's + * an extent item and we are holding the inode lock, the + * key must still exist. If it doesn't just emit warning + * and return an error to fall back to a transaction + * commit. + */ + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (WARN_ON(ret > 0)) + return -ENOENT; + leaf = path->nodes[0]; + } extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_INLINE) - return 0; + BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_ram_bytes(leaf, extent); + prev_extent_end = ALIGN(key.offset + len, + fs_info->sectorsize); + } else { + len = btrfs_file_extent_num_bytes(leaf, extent); + prev_extent_end = key.offset + len; + } - len = btrfs_file_extent_num_bytes(leaf, extent); - /* Last extent goes beyond i_size, no need to log a hole. */ - if (key.offset + len > i_size) - return 0; - hole_start = key.offset + len; - hole_size = i_size - hole_start; + path->slots[0]++; + cond_resched(); } - btrfs_release_path(path); - /* Last extent ends at i_size. */ - if (hole_size == 0) - return 0; + if (prev_extent_end < i_size) { + u64 hole_len; - hole_size = ALIGN(hole_size, fs_info->sectorsize); - ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, - hole_size, 0, hole_size, 0, 0, 0); - return ret; + btrfs_release_path(path); + hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, 0, + hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + } + + return 0; } /* @@ -5110,7 +4921,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_key min_key; struct btrfs_key max_key; struct btrfs_root *log = root->log_root; - u64 last_extent = 0; int err = 0; int ret; int nritems; @@ -5288,7 +5098,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ins_start_slot = path->slots[0]; } ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { @@ -5311,17 +5121,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (ins_nr == 0) goto next_slot; ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } ins_nr = 0; - if (ret) { - btrfs_release_path(path); - continue; - } goto next_slot; } @@ -5334,18 +5140,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, goto next_slot; } - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - if (ret) { - ins_nr = 0; - btrfs_release_path(path); - continue; - } ins_nr = 1; ins_start_slot = path->slots[0]; next_slot: @@ -5359,13 +5160,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } btrfs_release_path(path); @@ -5380,14 +5180,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } } if (ins_nr) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } @@ -5400,7 +5199,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_trailing_hole(trans, root, inode, path); + err = btrfs_log_holes(trans, root, inode, path); if (err) goto out_unlock; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6d3f08bfff37b..66377e678504c3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -66,6 +66,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { .tolerated_failures = 2, .devs_increment = 3, .ncopies = 3, + .nparity = 0, .raid_name = "raid1c3", .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3, .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET, @@ -78,6 +79,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { .tolerated_failures = 3, .devs_increment = 4, .ncopies = 4, + .nparity = 0, .raid_name = "raid1c4", .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4, .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, @@ -1064,11 +1066,6 @@ static void btrfs_close_bdev(struct btrfs_device *device) static void btrfs_close_one_device(struct btrfs_device *device) { struct btrfs_fs_devices *fs_devices = device->fs_devices; - struct btrfs_device *new_device; - struct rcu_string *name; - - if (device->bdev) - fs_devices->open_devices--; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && device->devid != BTRFS_DEV_REPLACE_DEVID) { @@ -1080,23 +1077,21 @@ static void btrfs_close_one_device(struct btrfs_device *device) fs_devices->missing_devices--; btrfs_close_bdev(device); - - new_device = btrfs_alloc_device(NULL, &device->devid, - device->uuid); - BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ - - /* Safe because we are under uuid_mutex */ - if (device->name) { - name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(!name); /* -ENOMEM */ - rcu_assign_pointer(new_device->name, name); + if (device->bdev) { + fs_devices->open_devices--; + device->bdev = NULL; } + clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); - list_replace_rcu(&device->dev_list, &new_device->dev_list); - new_device->fs_devices = device->fs_devices; + atomic_set(&device->dev_stats_ccnt, 0); + extent_io_tree_release(&device->alloc_state); - synchronize_rcu(); - btrfs_free_device(device); + /* Verify the device is back in a pristine state */ + ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)); + ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)); + ASSERT(list_empty(&device->dev_alloc_list)); + ASSERT(list_empty(&device->post_commit_list)); + ASSERT(atomic_read(&device->reada_in_flight) == 0); } static int close_fs_devices(struct btrfs_fs_devices *fs_devices) @@ -2130,7 +2125,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) { struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices; - WARN_ON(!tgtdev); mutex_lock(&fs_devices->device_list_mutex); btrfs_sysfs_rm_device_link(fs_devices, tgtdev); @@ -6476,19 +6470,14 @@ static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) { int index = btrfs_bg_flags_to_raid_index(type); int ncopies = btrfs_raid_array[index].ncopies; + const int nparity = btrfs_raid_array[index].nparity; int data_stripes; - switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { - case BTRFS_BLOCK_GROUP_RAID5: - data_stripes = num_stripes - 1; - break; - case BTRFS_BLOCK_GROUP_RAID6: - data_stripes = num_stripes - 2; - break; - default: + if (nparity) + data_stripes = num_stripes - nparity; + else data_stripes = num_stripes / ncopies; - break; - } + return div_u64(chunk_len, data_stripes); } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index fc1b564b9cfe57..3c56ef571b00be 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -255,7 +255,7 @@ struct btrfs_fs_devices { struct btrfs_fs_info *fs_info; /* sysfs kobjects */ struct kobject fsid_kobj; - struct kobject *device_dir_kobj; + struct kobject *devices_kobj; struct completion kobj_unregister; }; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 620bf1b38fba78..17088a112ed0bb 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -496,9 +496,9 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, TP_fast_assign_btrfs(btrfs_sb(inode->i_sb), __entry->ino = btrfs_ino(BTRFS_I(inode)); __entry->file_offset = ordered->file_offset; - __entry->start = ordered->start; - __entry->len = ordered->len; - __entry->disk_len = ordered->disk_len; + __entry->start = ordered->disk_bytenr; + __entry->len = ordered->num_bytes; + __entry->disk_len = ordered->disk_num_bytes; __entry->bytes_left = ordered->bytes_left; __entry->flags = ordered->flags; __entry->compress_type = ordered->compress_type;