[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#536860: linux-image-2.6.26-2-sparc64: Kernel unaligned access at TPC[48bf10] __delayacct_add_tsk+0x48/0x15c



Jonathan Nieder wrote:

> Could you test a recent kernel
> from sid or experimental, or try the following patch?
[...]
>     taskstats: use better ifdef for alignment

The following changes would need to be applied first for it to work,
of course.

commit 85893120
Author: Jeff Mahoney <jeffm@suse.com>
Date:   Wed Oct 27 15:34:43 2010 -0700

    delayacct: align to 8 byte boundary on 64-bit systems
    
    prepare_reply() sets up an skb for the response.  The payload contains:
    
     +--------------------------------+
     | genlmsghdr - 4 bytes           |
     +--------------------------------+
     | NLA header - 4 bytes           | /* Aggregate header */
     +-+------------------------------+
     | | NLA header - 4 bytes         | /* PID header */
     | +------------------------------+
     | | pid/tgid   - 4 bytes         |
     | +------------------------------+
     | | NLA header - 4 bytes         | /* stats header */
     | + -----------------------------+ <- oops. aligned on 4 byte boundary
     | | struct taskstats - 328 bytes |
     +-+------------------------------+
    
    The start of the taskstats struct must be 8 byte aligned on IA64 (and
    other systems with 8 byte alignment rules for 64-bit types) or runtime
    alignment warnings will be issued.
    
    This patch pads the pid/tgid field out to sizeof(long), which forces the
    alignment of taskstats.  The getdelays userspace code is ok with this
    since it assumes 32-bit pid/tgid and then honors that header's length
    field.
    
    An array is used to avoid exposing kernel memory contents to userspace in
    the response.
    
    Signed-off-by: Jeff Mahoney <jeffm@suse.com>
    Cc: Balbir Singh <balbir@in.ibm.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 11281d57..5a651aa6 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -360,6 +360,12 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
 	struct nlattr *na, *ret;
 	int aggr;
 
+	/* If we don't pad, we end up with alignment on a 4 byte boundary.
+	 * This causes lots of runtime warnings on systems requiring 8 byte
+	 * alignment */
+	u32 pids[2] = { pid, 0 };
+	int pid_size = ALIGN(sizeof(pid), sizeof(long));
+
 	aggr = (type == TASKSTATS_TYPE_PID)
 			? TASKSTATS_TYPE_AGGR_PID
 			: TASKSTATS_TYPE_AGGR_TGID;
@@ -367,7 +373,7 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
 	na = nla_nest_start(skb, aggr);
 	if (!na)
 		goto err;
-	if (nla_put(skb, type, sizeof(pid), &pid) < 0)
+	if (nla_put(skb, type, pid_size, pids) < 0)
 		goto err;
 	ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
 	if (!ret)
commit 4be2c95d
Author: Jeff Mahoney <jeffm@suse.com>
Date:   Tue Dec 21 17:24:30 2010 -0800

    taskstats: pad taskstats netlink response for aligment issues on ia64
    
    The taskstats structure is internally aligned on 8 byte boundaries but the
    layout of the aggregrate reply, with two NLA headers and the pid (each 4
    bytes), actually force the entire structure to be unaligned.  This causes
    the kernel to issue unaligned access warnings on some architectures like
    ia64.  Unfortunately, some software out there doesn't properly unroll the
    NLA packet and assumes that the start of the taskstats structure will
    always be 20 bytes from the start of the netlink payload.  Aligning the
    start of the taskstats structure breaks this software, which we don't
    want.  So, for now the alignment only happens on architectures that
    require it and those users will have to update to fixed versions of those
    packages.  Space is reserved in the packet only when needed.  This ifdef
    should be removed in several years e.g.  2012 once we can be confident
    that fixed versions are installed on most systems.  We add the padding
    before the aggregate since the aggregate is already a defined type.
    
    Commit 85893120 ("delayacct: align to 8 byte boundary on 64-bit systems")
    previously addressed the alignment issues by padding out the pid field.
    This was supposed to be a compatible change but the circumstances
    described above mean that it wasn't.  This patch backs out that change,
    since it was a hack, and introduces a new NULL attribute type to provide
    the padding.  Padding the response with 4 bytes avoids allocating an
    aligned taskstats structure and copying it back.  Since the structure
    weighs in at 328 bytes, it's too big to do it on the stack.
    
    Signed-off-by: Jeff Mahoney <jeffm@suse.com>
    Reported-by: Brian Rogers <brian@xyzw.org>
    Cc: Jeff Mahoney <jeffm@suse.com>
    Cc: Guillaume Chazarain <guichaz@gmail.com>
    Cc: Balbir Singh <balbir@in.ibm.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index a2976a6d..e9c77788 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -516,6 +516,7 @@ int main(int argc, char *argv[])
 			default:
 				fprintf(stderr, "Unknown nla_type %d\n",
 					na->nla_type);
+			case TASKSTATS_TYPE_NULL:
 				break;
 			}
 			na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 341dddb5..2466e550 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -33,7 +33,7 @@
  */
 
 
-#define TASKSTATS_VERSION	7
+#define TASKSTATS_VERSION	8
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -188,6 +188,7 @@ enum {
 	TASKSTATS_TYPE_STATS,		/* taskstats structure */
 	TASKSTATS_TYPE_AGGR_PID,	/* contains pid + stats */
 	TASKSTATS_TYPE_AGGR_TGID,	/* contains tgid + stats */
+	TASKSTATS_TYPE_NULL,		/* contains nothing */
 	__TASKSTATS_TYPE_MAX,
 };
 
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index c8231fb1..3308fd7f 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask)
 	return ret;
 }
 
+#ifdef CONFIG_IA64
+#define TASKSTATS_NEEDS_PADDING 1
+#endif
+
 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
 {
 	struct nlattr *na, *ret;
 	int aggr;
 
-	/* If we don't pad, we end up with alignment on a 4 byte boundary.
-	 * This causes lots of runtime warnings on systems requiring 8 byte
-	 * alignment */
-	u32 pids[2] = { pid, 0 };
-	int pid_size = ALIGN(sizeof(pid), sizeof(long));
-
 	aggr = (type == TASKSTATS_TYPE_PID)
 			? TASKSTATS_TYPE_AGGR_PID
 			: TASKSTATS_TYPE_AGGR_TGID;
 
+	/*
+	 * The taskstats structure is internally aligned on 8 byte
+	 * boundaries but the layout of the aggregrate reply, with
+	 * two NLA headers and the pid (each 4 bytes), actually
+	 * force the entire structure to be unaligned. This causes
+	 * the kernel to issue unaligned access warnings on some
+	 * architectures like ia64. Unfortunately, some software out there
+	 * doesn't properly unroll the NLA packet and assumes that the start
+	 * of the taskstats structure will always be 20 bytes from the start
+	 * of the netlink payload. Aligning the start of the taskstats
+	 * structure breaks this software, which we don't want. So, for now
+	 * the alignment only happens on architectures that require it
+	 * and those users will have to update to fixed versions of those
+	 * packages. Space is reserved in the packet only when needed.
+	 * This ifdef should be removed in several years e.g. 2012 once
+	 * we can be confident that fixed versions are installed on most
+	 * systems. We add the padding before the aggregate since the
+	 * aggregate is already a defined type.
+	 */
+#ifdef TASKSTATS_NEEDS_PADDING
+	if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
+		goto err;
+#endif
 	na = nla_nest_start(skb, aggr);
 	if (!na)
 		goto err;
-	if (nla_put(skb, type, pid_size, pids) < 0)
+
+	if (nla_put(skb, type, sizeof(pid), &pid) < 0)
 		goto err;
 	ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
 	if (!ret)
@@ -456,6 +478,18 @@ out:
 	return rc;
 }
 
+static size_t taskstats_packet_size(void)
+{
+	size_t size;
+
+	size = nla_total_size(sizeof(u32)) +
+		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+#ifdef TASKSTATS_NEEDS_PADDING
+	size += nla_total_size(0); /* Padding for alignment */
+#endif
+	return size;
+}
+
 static int cmd_attr_pid(struct genl_info *info)
 {
 	struct taskstats *stats;
@@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info)
 	u32 pid;
 	int rc;
 
-	size = nla_total_size(sizeof(u32)) +
-		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+	size = taskstats_packet_size();
 
 	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
 	if (rc < 0)
@@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info)
 	u32 tgid;
 	int rc;
 
-	size = nla_total_size(sizeof(u32)) +
-		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+	size = taskstats_packet_size();
 
 	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
 	if (rc < 0)
@@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	/*
 	 * Size includes space for nested attributes
 	 */
-	size = nla_total_size(sizeof(u32)) +
-		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+	size = taskstats_packet_size();
 
 	is_thread_group = !!taskstats_tgid_alloc(tsk);
 	if (is_thread_group) {



Reply to: