[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#607879: System hangs up with mmap.c:873!



On Sun, 2011-01-23 at 09:18 +0100, Ronny Standtke wrote:
> > See
> > <http://kernel-handbook.alioth.debian.org/ch-common-tasks.html#s-common-off
> > icial> - in particular, section 4.2.5, Simplified patching and building.
> 
> OK, here is what I tried now:
> -------------
> apt-get source linux-image-2.6.32-5-686
> cd linux-2.6-2.6.32
> bash debian/bin/test-patches ../000*
> -------------
> 
> Unfortunately, this fails with the following error message:
> ...
> --> 30 fully applied.
> --> Try to apply 30a~test.
>   (+) OK   test/0001-Revert-aufs-narrow-down-the-BKL-region.patch
>   (+) OK   test/0002-Revert-aufs-bugfix-unlock-mmap_sem-temporary-using-
> B.patch
> patch: **** malformed patch at line 182: @@ -630,25 +698,22 @@ static int 
> aufs_mmap(struct file *file, struct vm_area_struct *vma)
> 
>   (+) FAIL test/0003-aufs-bugfix-another-approach-to-keep-the-lock-
[...]

Not sure how I did that, but try this version of patch 3 instead.

Ben.

-- 
Ben Hutchings
Once a job is fouled up, anything done to improve it makes it worse.
From: J. R. Okajima <hooanon05@yahoo.co.jp>
Date: Mon, 8 Mar 2010 23:45:56 +0900
Subject: [PATCH 3/4] aufs: bugfix, another approach to keep the lock order of mmap_sem

commit d986fa5a8557f6861fcac4106b6d75301bf5d118 in aufs2-2.6

The previous approach
	4b70e6f aufs: bugfix, unlock mmap_sem temporary using BKL
was bad and already reverted.
This approach is ugly too, but it works.

- split aufs_mmap() into two parts.
- the first part is for copy-ing up which requires rwsem and executed by
  aufsd workqueue.
- the second part is generic_file_mmap() and customizing vm_ops, and
  executed by the original context.
- to protect customizing vm_ops from race between two mmaps, introduce a
  new mutex in au_finfo. lock in the first phase, and release it in the
  second. this is the most ugly part of this approach. if we could use
  fi_rwsem for this use, we would use it. but there is no 'set_owner'
  method for rwsem, but mutex has.

Signed-off-by: J. R. Okajima <hooanon05@yahoo.co.jp>
[bwh: Adjust for 2.6.32]
---
 fs/aufs/f_op.c  |  113 +++++++++++++++++++++++++++++++++++++++++++------------
 fs/aufs/file.h  |    3 +
 fs/aufs/finfo.c |   17 ++++++++
 3 files changed, 109 insertions(+), 24 deletions(-)

diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
index 6f89992..32cc36f 100644
--- a/fs/aufs/f_op.c
+++ b/fs/aufs/f_op.c
@@ -77,6 +77,7 @@ int au_do_open_nondir(struct file *file, int flags)
 	finfo = au_fi(file);
 	finfo->fi_h_vm_ops = NULL;
 	finfo->fi_vm_ops = NULL;
+	mutex_init(&finfo->fi_mmap); /* regular file only? */
 	bindex = au_dbstart(dentry);
 	/* O_TRUNC is processed already */
 	BUG_ON(au_test_ro(dentry->d_sb, bindex, dentry->d_inode)
@@ -544,7 +545,7 @@ static int au_custom_vm_ops(struct au_finfo *finfo, struct vm_area_struct *vma)
 	int err;
 	struct vm_operations_struct *h_ops;
 
-	AuRwMustAnyLock(&finfo->fi_rwsem);
+	MtxMustLock(&finfo->fi_mmap);
 
 	err = 0;
 	h_ops = finfo->fi_h_vm_ops;
@@ -570,49 +571,115 @@ static int au_custom_vm_ops(struct au_finfo *finfo, struct vm_area_struct *vma)
 	return err;
 }
 
-static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
+/*
+ * This is another ugly approach to keep the lock order, particularly
+ * mm->mmap_sem and aufs rwsem. The previous approach was reverted and you can
+ * find it in git-log, if you want.
+ *
+ * native readdir: i_mutex, copy_to_user, mmap_sem
+ * aufs readdir: i_mutex, rwsem, nested-i_mutex, copy_to_user, mmap_sem
+ *
+ * Before aufs_mmap() mmap_sem is acquired already, but aufs_mmap() has to
+ * acquire aufs rwsem. It introduces a circular locking dependency.
+ * To address this problem, aufs_mmap() delegates the part which requires aufs
+ * rwsem to its internal workqueue.
+ */
+
+/* very ugly approach */
+#ifdef CONFIG_DEBUG_MUTEXES
+#include <../kernel/mutex-debug.h>
+#else
+#include <../kernel/mutex.h>
+#endif
+
+struct au_mmap_pre_args {
+	/* input */
+	struct file *file;
+	struct vm_area_struct *vma;
+
+	/* output */
+	int *errp;
+	struct file *h_file;
+	int mmapped;
+};
+
+static int au_mmap_pre(struct file *file, struct vm_area_struct *vma,
+		       struct file **h_file, int *mmapped)
 {
 	int err;
-	unsigned char wlock, mmapped;
+	const unsigned char wlock
+		= !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
 	struct dentry *dentry;
 	struct super_block *sb;
-	struct file *h_file;
-	struct vm_operations_struct *vm_ops;
 
 	dentry = file->f_dentry;
-	wlock = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
 	sb = dentry->d_sb;
-	si_read_lock(sb, AuLock_FLUSH);
+	si_read_lock(sb, !AuLock_FLUSH);
 	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
 	if (unlikely(err))
 		goto out;
 
-	mmapped = !!au_test_mmapped(file);
+	*mmapped = !!au_test_mmapped(file);
 	if (wlock) {
 		struct au_pin pin;
 
 		err = au_ready_to_write(file, -1, &pin);
-		di_downgrade_lock(dentry, AuLock_IR);
+		di_write_unlock(dentry);
 		if (unlikely(err))
 			goto out_unlock;
 		au_unpin(&pin);
 	} else
-		di_downgrade_lock(dentry, AuLock_IR);
+		di_write_unlock(dentry);
+	*h_file = au_h_fptr(file, au_fbstart(file));
+	get_file(*h_file);
+	au_fi_mmap_lock(file);
 
-	h_file = au_h_fptr(file, au_fbstart(file));
-	if (!mmapped && au_test_fs_bad_mapping(h_file->f_dentry->d_sb)) {
+out_unlock:
+	fi_write_unlock(file);
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+static void au_call_mmap_pre(void *args)
+{
+	struct au_mmap_pre_args *a = args;
+	*a->errp = au_mmap_pre(a->file, a->vma, &a->h_file, &a->mmapped);
+}
+
+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int err, wkq_err;
+	struct au_finfo *finfo;
+	struct dentry *h_dentry;
+	struct vm_operations_struct *vm_ops;
+	struct au_mmap_pre_args args = {
+		.file		= file,
+		.vma		= vma,
+		.errp		= &err
+	};
+
+	wkq_err = au_wkq_wait(au_call_mmap_pre, &args);
+	if (unlikely(wkq_err))
+		err = wkq_err;
+	if (unlikely(err))
+		goto out;
+	finfo = au_fi(file);
+
+	h_dentry = args.h_file->f_dentry;
+	if (!args.mmapped && au_test_fs_bad_mapping(h_dentry->d_sb)) {
 		/*
 		 * by this assignment, f_mapping will differs from aufs inode
 		 * i_mapping.
 		 * if someone else mixes the use of f_dentry->d_inode and
 		 * f_mapping->host, then a problem may arise.
 		 */
-		file->f_mapping = h_file->f_mapping;
+		file->f_mapping = args.h_file->f_mapping;
 	}
 
 	vm_ops = NULL;
-	if (!mmapped) {
-		vm_ops = au_vm_ops(h_file, vma);
+	if (!args.mmapped) {
+		vm_ops = au_vm_ops(args.h_file, vma);
 		err = PTR_ERR(vm_ops);
 		if (IS_ERR(vm_ops))
 			goto out_unlock;
@@ -630,25 +698,22 @@ static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
 		goto out_unlock;
 
 	vma->vm_ops = &aufs_vm_ops;
-	if (!mmapped) {
-		struct au_finfo *finfo = au_fi(file);
-
+	if (!args.mmapped) {
 		finfo->fi_h_vm_ops = vm_ops;
 		mutex_init(&finfo->fi_vm_mtx);
 	}
 
-	err = au_custom_vm_ops(au_fi(file), vma);
+	err = au_custom_vm_ops(finfo, vma);
 	if (unlikely(err))
 		goto out_unlock;
 
-	vfsub_file_accessed(h_file);
-	fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
+	vfsub_file_accessed(args.h_file);
+	fsstack_copy_attr_atime(file->f_dentry->d_inode, h_dentry->d_inode);
 
  out_unlock:
-	di_read_unlock(dentry, AuLock_IR);
-	fi_write_unlock(file);
+	au_fi_mmap_unlock(file);
+	fput(args.h_file);
  out:
-	si_read_unlock(sb);
 	return err;
 }
 
diff --git a/fs/aufs/file.h b/fs/aufs/file.h
index 4aaea9e..1d1d9d6 100644
--- a/fs/aufs/file.h
+++ b/fs/aufs/file.h
@@ -49,6 +49,7 @@ struct au_finfo {
 			struct vm_operations_struct	*fi_h_vm_ops;
 			struct vm_operations_struct	*fi_vm_ops;
 			struct mutex			fi_vm_mtx;
+			struct mutex			fi_mmap;
 		};
 
 		/* dir only */
@@ -114,6 +115,8 @@ void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
 		   struct file *h_file);
 
 void au_update_figen(struct file *file);
+void au_fi_mmap_lock(struct file *file);
+void au_fi_mmap_unlock(struct file *file);
 
 void au_finfo_fin(struct file *file);
 int au_finfo_init(struct file *file);
diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c
index 4ab55e4..24ed4a1 100644
--- a/fs/aufs/finfo.c
+++ b/fs/aufs/finfo.c
@@ -56,6 +56,23 @@ void au_update_figen(struct file *file)
 
 /* ---------------------------------------------------------------------- */
 
+void au_fi_mmap_lock(struct file *file)
+{
+	FiMustWriteLock(file);
+	lockdep_off();
+	mutex_lock(&au_fi(file)->fi_mmap);
+	lockdep_on();
+}
+
+void au_fi_mmap_unlock(struct file *file)
+{
+	lockdep_off();
+	mutex_unlock(&au_fi(file)->fi_mmap);
+	lockdep_on();
+}
+
+/* ---------------------------------------------------------------------- */
+
 void au_finfo_fin(struct file *file)
 {
 	struct au_finfo *finfo;
-- 
1.7.2.3

Attachment: signature.asc
Description: This is a digitally signed message part


Reply to: