STABLE2 - gfs-kernel: bz298931 - GFS unlinked inode metadata leak

Abhijith Das adas@fedoraproject.org
Fri Sep 12 21:48:00 GMT 2008


Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=f8d0cbc5b06340085e077d5b41e0b226732332af
Commit:        f8d0cbc5b06340085e077d5b41e0b226732332af
Parent:        707bb488158fb80a4166dc10197b017a222bab55
Author:        Abhijith Das <adas@redhat.com>
AuthorDate:    Fri Sep 12 16:44:04 2008 -0500
Committer:     Abhijith Das <adas@redhat.com>
CommitterDate: Fri Sep 12 16:44:51 2008 -0500

gfs-kernel: bz298931 - GFS unlinked inode metadata leak

Have inoded reclaim metadata from x rgrps at a time

The tunable max_rgrp_free_mdata is the maximum number of rgrps
to free unused metadata from during each cycle of inoded.
Default is 5.
---
 gfs-kernel/src/gfs/daemon.c |    4 ++++
 gfs-kernel/src/gfs/incore.h |    4 ++++
 gfs-kernel/src/gfs/ioctl.c  |   10 +++++++---
 gfs-kernel/src/gfs/rgrp.c   |   18 +++++++++++++++---
 gfs-kernel/src/gfs/rgrp.h   |    3 ++-
 gfs-kernel/src/gfs/super.c  |    1 +
 6 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/gfs-kernel/src/gfs/daemon.c b/gfs-kernel/src/gfs/daemon.c
index 378e0a7..c0c8057 100644
--- a/gfs-kernel/src/gfs/daemon.c
+++ b/gfs-kernel/src/gfs/daemon.c
@@ -13,6 +13,7 @@
 #include "recovery.h"
 #include "super.h"
 #include "unlinked.h"
+#include "rgrp.h"
 
 /**
  * gfs_scand - Look for cached glocks and inodes to toss from memory
@@ -173,7 +174,10 @@ gfs_inoded(void *data)
 	struct gfs_sbd *sdp = (struct gfs_sbd *)data;
 
 	while (!kthread_should_stop()) {
+		uint64_t inodes, metadata;
 		gfs_unlinked_dealloc(sdp);
+		gfs_reclaim_metadata(sdp, &inodes, &metadata, 
+				     gfs_tune_get(sdp, gt_max_rgrp_free_mdata));
 		schedule_timeout_interruptible(gfs_tune_get(sdp, gt_inoded_secs) * HZ);
 	}
 
diff --git a/gfs-kernel/src/gfs/incore.h b/gfs-kernel/src/gfs/incore.h
index fedde49..bad76fc 100644
--- a/gfs-kernel/src/gfs/incore.h
+++ b/gfs-kernel/src/gfs/incore.h
@@ -203,6 +203,8 @@ struct gfs_bitmap {
 	uint32_t bi_len;     /* The number of bitmap bytes in this bit block */
 };
 
+#define RD_FL_META2FREE        (0x00000001)  /* rgrpd has freeable metadata */
+
 /*
  *  Resource Group (Rgrp) descriptor structure.
  *  There is one of these for each resource (block) group in the FS.
@@ -236,6 +238,7 @@ struct gfs_rgrpd {
 	struct list_head rd_depend;     /* Dependent inodes (MRU order) */
 
 	struct gfs_sbd *rd_sbd;		/* FS incore superblock (fs instance) */
+	uint32_t rd_flags;              /* RD_FL_XXX : flags for this rgrpd */
 };
 
 /*
@@ -892,6 +895,7 @@ struct gfs_tune {
 	unsigned int gt_greedy_max;
 	unsigned int gt_rgrp_try_threshold;
 	unsigned int gt_statfs_fast;
+	unsigned int gt_max_rgrp_free_mdata; /* Max # of rgrps to free metadata from */
 };
 
 /*
diff --git a/gfs-kernel/src/gfs/ioctl.c b/gfs-kernel/src/gfs/ioctl.c
index 73985df..46517b6 100644
--- a/gfs-kernel/src/gfs/ioctl.c
+++ b/gfs-kernel/src/gfs/ioctl.c
@@ -472,6 +472,7 @@ gi_get_tune(struct gfs_inode *ip,
         gfs_printf("greedy_max %u\n", gt->gt_greedy_max);
         gfs_printf("rgrp_try_threshold %u\n", gt->gt_rgrp_try_threshold);
         gfs_printf("statfs_fast %u\n", gt->gt_statfs_fast);
+        gfs_printf("max_rgrp_free_mdata %u\n", gt->gt_max_rgrp_free_mdata);
 
         error = 0;
 
@@ -759,8 +760,10 @@ gi_set_tune(struct gfs_sbd *sdp, struct gfs_ioctl *gi, int from_user)
 			return error;
 		else
 			tune_set(gt_statfs_fast, x);
-
-
+	} else if (strcmp(param, "max_rgrp_free_mdata") == 0) {
+		if (sscanf(value, "%u", &x) != 1)
+			return -EINVAL;
+		tune_set(gt_max_rgrp_free_mdata, x);
 	} else
 		return -EINVAL;
 
@@ -795,7 +798,8 @@ gi_do_reclaim(struct gfs_inode *ip,
 
 	error = gfs_reclaim_metadata(ip->i_sbd,
 				     &inodes,
-				     &metadata);
+				     &metadata,
+				     ip->i_sbd->sd_rgcount);
 	if (error)
 		return error;
 
diff --git a/gfs-kernel/src/gfs/rgrp.c b/gfs-kernel/src/gfs/rgrp.c
index 947757e..47f8e78 100644
--- a/gfs-kernel/src/gfs/rgrp.c
+++ b/gfs-kernel/src/gfs/rgrp.c
@@ -1522,6 +1522,9 @@ blkfree_internal(struct gfs_sbd *sdp, uint64_t bstart, uint32_t blen,
 			   bits->bi_len, buf_blk, new_state);
 	}
 
+	if (new_state == GFS_BLKST_FREEMETA)
+		rgd->rd_flags |= RD_FL_META2FREE;
+
 	return rgd;
 }
 
@@ -2010,7 +2013,8 @@ gfs_rlist_free(struct gfs_rgrp_list *rlist)
 int
 gfs_reclaim_metadata(struct gfs_sbd *sdp, 
 		     uint64_t *inodes,
-		     uint64_t *metadata)
+		     uint64_t *metadata,
+		     uint32_t rg_max)
 {
 	struct gfs_holder ji_gh, ri_gh, rgd_gh, t_gh;
 	struct gfs_rgrpd *rgd;
@@ -2022,12 +2026,13 @@ gfs_reclaim_metadata(struct gfs_sbd *sdp,
 	uint32_t goal;
 	unsigned int x;
 	int error = 0;
+	uint64_t rg_count = 0;
 
+	rg_max = rg_max > sdp->sd_rgcount ? sdp->sd_rgcount : rg_max;
 	*inodes = *metadata = 0;
 
 	/* Acquire the jindex lock here so we don't deadlock with a
 	   process writing the the jindex inode. :-( */
-
 	error = gfs_jindex_hold(sdp, &ji_gh);
 	if (error)
 		goto fail;
@@ -2037,8 +2042,11 @@ gfs_reclaim_metadata(struct gfs_sbd *sdp,
 		goto fail_jindex_relse;
 
 	for (rgd = gfs_rgrpd_get_first(sdp);
-	     rgd;
+	     rgd && rg_count < rg_max;
 	     rgd = gfs_rgrpd_get_next(rgd)) {
+		if (!(rgd->rd_flags & RD_FL_META2FREE))
+			continue;
+
 		error = gfs_glock_nq_init(rgd->rd_gl,
 					  LM_ST_EXCLUSIVE, GL_NOCACHE,
 					  &rgd_gh);
@@ -2051,6 +2059,7 @@ gfs_reclaim_metadata(struct gfs_sbd *sdp,
 
 		if (!rg->rg_freedi && !rg->rg_freemeta) {
 			gfs_glock_dq_uninit(&rgd_gh);
+			rgd->rd_flags &= ~RD_FL_META2FREE;
 			continue;
 		}
 
@@ -2123,6 +2132,9 @@ gfs_reclaim_metadata(struct gfs_sbd *sdp,
 
 		gfs_glock_dq_uninit(&t_gh);
 
+		rgd->rd_flags &= ~RD_FL_META2FREE;
+		rg_count++;
+
 		gfs_glock_dq_uninit(&rgd_gh);
 	}
 
diff --git a/gfs-kernel/src/gfs/rgrp.h b/gfs-kernel/src/gfs/rgrp.h
index 0f5c620..c10a9e0 100644
--- a/gfs-kernel/src/gfs/rgrp.h
+++ b/gfs-kernel/src/gfs/rgrp.h
@@ -70,6 +70,7 @@ void gfs_rlist_free(struct gfs_rgrp_list *rlist);
 
 int gfs_reclaim_metadata(struct gfs_sbd *sdp,
 			 uint64_t *inodes,
-			 uint64_t *metadata);
+			 uint64_t *metadata,
+			 uint32_t rg_max);
 
 #endif /* __RGRP_DOT_H__ */
diff --git a/gfs-kernel/src/gfs/super.c b/gfs-kernel/src/gfs/super.c
index 1dea0a0..12f7a25 100644
--- a/gfs-kernel/src/gfs/super.c
+++ b/gfs-kernel/src/gfs/super.c
@@ -75,6 +75,7 @@ gfs_tune_init(struct gfs_tune *gt)
 	gt->gt_greedy_max = HZ / 4;
 	gt->gt_rgrp_try_threshold = 100;
 	gt->gt_statfs_fast = 0;
+	gt->gt_max_rgrp_free_mdata = 5;
 }
 
 /**



More information about the Cluster-cvs mailing list