aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/blocklayout/blocklayoutdev.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2014-09-10 08:23:34 -0700
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-09-10 12:47:03 -0700
commit8067253c8cc531b6f367b9f5942bdc6168385701 (patch)
tree78d7663906883132df08a2a1aecf6e385fd766d1 /fs/nfs/blocklayout/blocklayoutdev.c
parent8c792ea940499153732adea2ea4ca37f6999778f (diff)
pnfs/blocklayout: rewrite extent tracking
Currently the block layout driver tracks extents in three separate data structures: - the two list of pnfs_block_extent structures returned by the server - the list of sectors that were in invalid state but have been written to - a list of pnfs_block_short_extent structures for LAYOUTCOMMIT All of these share the property that they are not only highly inefficient data structures, but also that operations on them are even more inefficient than nessecary. In addition there are various implementation defects like: - using an int to track sectors, causing corruption for large offsets - incorrect normalization of page or block granularity ranges - insufficient error handling - incorrect synchronization as extents can be modified while they are in use This patch replace all three data with a single unified rbtree structure tracking all extents, as well as their in-memory state, although we still need to instance for read-only and read-write extent due to the arcane client side COW feature in the block layouts spec. To fix the problem of extent possibly being modified while in use we make sure to return a copy of the extent for use in the write path - the extent can only be invalidated by a layout recall or return which has to wait until the I/O operations finished due to refcounts on the layout segment. The new extent tree work similar to the schemes used by block based filesystems like XFS or ext4. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Diffstat (limited to 'fs/nfs/blocklayout/blocklayoutdev.c')
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c35
1 files changed, 13 insertions, 22 deletions
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index 63f77925aa87..cd71b5e231ec 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -309,7 +309,7 @@ nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
* recovery easier.
*/
for (i = 0; i < count; i++) {
- be = bl_alloc_extent();
+ be = kzalloc(sizeof(struct pnfs_block_extent), GFP_NOFS);
if (!be) {
status = -ENOMEM;
goto out_err;
@@ -330,13 +330,11 @@ nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
if (decode_sector_number(&p, &be->be_v_offset) < 0)
goto out_err;
be->be_state = be32_to_cpup(p++);
- if (be->be_state == PNFS_BLOCK_INVALID_DATA)
- be->be_inval = &bl->bl_inval;
if (verify_extent(be, &lv)) {
dprintk("%s verify failed\n", __func__);
goto out_err;
}
- list_add_tail(&be->be_node, &extents);
+ list_add_tail(&be->be_list, &extents);
}
if (lgr->range.offset + lgr->range.length !=
lv.start << SECTOR_SHIFT) {
@@ -352,21 +350,13 @@ nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
/* Extents decoded properly, now try to merge them in to
* existing layout extents.
*/
- spin_lock(&bl->bl_ext_lock);
- list_for_each_entry_safe(be, save, &extents, be_node) {
- list_del(&be->be_node);
- status = bl_add_merge_extent(bl, be);
- if (status) {
- spin_unlock(&bl->bl_ext_lock);
- /* This is a fairly catastrophic error, as the
- * entire layout extent lists are now corrupted.
- * We should have some way to distinguish this.
- */
- be = NULL;
- goto out_err;
- }
+ list_for_each_entry_safe(be, save, &extents, be_list) {
+ list_del(&be->be_list);
+
+ status = ext_tree_insert(bl, be);
+ if (status)
+ goto out_free_list;
}
- spin_unlock(&bl->bl_ext_lock);
status = 0;
out:
__free_page(scratch);
@@ -374,12 +364,13 @@ nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
return status;
out_err:
- bl_put_extent(be);
+ kfree(be);
+ out_free_list:
while (!list_empty(&extents)) {
be = list_first_entry(&extents, struct pnfs_block_extent,
- be_node);
- list_del(&be->be_node);
- bl_put_extent(be);
+ be_list);
+ list_del(&be->be_list);
+ kfree(be);
}
goto out;
}

Privacy Policy