[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: CAN-2005-1761: Unspecified DoS through stack fault exceptions / SuSE specific?



On Sun, Aug 14, 2005 at 01:54:44PM +0200, Moritz Muehlenhoff wrote:
> Hi,
> I didn't file a bug for this as it lacks too much information:
> A week ago SuSE released an advisory for their kernels:
> http://www.novell.com/linux/security/advisories/2005_44_kernel.html
> 
> Among others they fixed this:
> - local users could crash the system by causing stack fault
>   exceptions (CAN-2005-1767)
> 
>   SUSE Linux 9.0 and SLES8 are affected.
> 
> Is this SuSE specific? I couldn't find more information about it?

I heard on the grapevine that its a problem with TSS on amd64.
The attached patches should resolve the problem. I'll get them into SVN
ASAP.

2.6.8
arch-x86_64-nmi.dpatch: needed by arch-x86_64-nmi.dpatch
arch-x86_64-nmi.dpatch: needed by arch-x86_64-private-tss.dpatch
arch-x86_64-private-tss.dpatch: the fix itself

2.4.27
arch-x86_64-kernel-stack-faults.diff

2.6.12 
is not vulnerable

-- 
Horms
commit 0a65800243742480b4b594b619b759749a3cfef4
tree 72f9a3b376c604e7619ef265c7dc351644f45359
parent 635186447d0e6f3b35895fda993a266a1315d2a7
author Andi Kleen <ak@suse.de> 1113690317 -0700
committer Linus Torvalds <torvalds@ppc970.osdl.org> 1113690317 -0700

[PATCH] x86_64: Rewrite exception stack backtracing

Exceptions and hardware interrupts can, to a certain degree, nest, so when
attempting to follow the sequence of stacks used in order to dump their
contents this has to be accounted for.  Also, IST stacks have their tops
stored in the TSS, so there's no need to add the stack size to get to their
ends.

Minor changes from AK.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

I:100644 100644 f6ccf155e3e5851a9819e6014400f093b0803246 65a37f52c56ef2c0760f2e3db9dfec9312a74d88 M	arch/x86_64/kernel/traps.c

Key:
S: Skipped
I: Included Included verbatim
D: Deleted  Manually deleted by subsequent user edit
R: Revised  Manually revised by subsequent user edit

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -120,95 +120,106 @@ int printk_address(unsigned long address
 } 
 #endif
 
-unsigned long *in_exception_stack(int cpu, unsigned long stack) 
-{ 
-	int k;
+static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
+					unsigned *usedp, const char **idp)
+{
+	static const char ids[N_EXCEPTION_STACKS][8] = {
+		[DEBUG_STACK - 1] = "#DB",
+		[NMI_STACK - 1] = "NMI",
+		[DOUBLEFAULT_STACK - 1] = "#DF",
+		[STACKFAULT_STACK - 1] = "#SS",
+		[MCE_STACK - 1] = "#MC",
+	};
+	unsigned k;
+
 	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
-		struct tss_struct *tss = &per_cpu(init_tss, cpu);
-		unsigned long start = tss->ist[k] - EXCEPTION_STKSZ;
+		unsigned long end;
 
-		if (stack >= start && stack < tss->ist[k])
-			return (unsigned long *)tss->ist[k];
+		end = per_cpu(init_tss, cpu).ist[k];
+		if (stack >= end)
+			continue;
+		if (stack >= end - EXCEPTION_STKSZ) {
+			if (*usedp & (1U << k))
+				break;
+			*usedp |= 1U << k;
+			*idp = ids[k];
+			return (unsigned long *)end;
+		}
 	}
 	return NULL;
-} 
+}
 
 /*
  * x86-64 can have upto three kernel stacks: 
  * process stack
  * interrupt stack
- * severe exception (double fault, nmi, stack fault) hardware stack
- * Check and process them in order.
+ * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
 void show_trace(unsigned long *stack)
 {
 	unsigned long addr;
-	unsigned long *irqstack, *irqstack_end, *estack_end;
-	const int cpu = safe_smp_processor_id();
+	const unsigned cpu = safe_smp_processor_id();
+	unsigned long *irqstack_end = (unsigned long *)cpu_pda[cpu].irqstackptr;
 	int i;
+	unsigned used = 0;
 
 	printk("\nCall Trace:");
-	i = 0; 
-	
-	estack_end = in_exception_stack(cpu, (unsigned long)stack); 
-	if (estack_end) { 
-		while (stack < estack_end) { 
-			addr = *stack++; 
-			if (__kernel_text_address(addr)) {
-				i += printk_address(addr);
-				i += printk(" "); 
-				if (i > 50) {
-					printk("\n"); 
-					i = 0;
-				}
-			}
+
+#define HANDLE_STACK(cond) \
+	do while (cond) { \
+		addr = *stack++; \
+		if (kernel_text_address(addr)) { \
+			/* \
+			 * If the address is either in the text segment of the \
+			 * kernel, or in the region which contains vmalloc'ed \
+			 * memory, it *may* be the address of a calling \
+			 * routine; if so, print it so that someone tracing \
+			 * down the cause of the crash will be able to figure \
+			 * out the call path that was taken. \
+			 */ \
+			i += printk_address(addr); \
+			if (i > 50) { \
+				printk("\n       "); \
+				i = 0; \
+			} \
+			else \
+				i += printk(" "); \
+		} \
+	} while (0)
+
+	for(i = 0; ; ) {
+		const char *id;
+		unsigned long *estack_end;
+		estack_end = in_exception_stack(cpu, (unsigned long)stack,
+						&used, &id);
+
+		if (estack_end) {
+			i += printk(" <%s> ", id);
+			HANDLE_STACK (stack < estack_end);
+			i += printk(" <EOE> ");
+			stack = (unsigned long *) estack_end[-2];
+			continue;
 		}
-		i += printk(" <EOE> "); 
-		i += 7;
-		stack = (unsigned long *) estack_end[-2]; 
-	}  
-
-	irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
-	irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64);
-
-	if (stack >= irqstack && stack < irqstack_end) {
-		printk("<IRQ> ");  
-		while (stack < irqstack_end) {
-			addr = *stack++;
-			/*
-			 * If the address is either in the text segment of the
-			 * kernel, or in the region which contains vmalloc'ed
-			 * memory, it *may* be the address of a calling
-			 * routine; if so, print it so that someone tracing
-			 * down the cause of the crash will be able to figure
-			 * out the call path that was taken.
-			 */
-			 if (__kernel_text_address(addr)) {
-				 i += printk_address(addr);
-				 i += printk(" "); 
-				 if (i > 50) { 
-					printk("\n       ");
-					 i = 0;
-				 } 
+		if (irqstack_end) {
+			unsigned long *irqstack;
+			irqstack = irqstack_end -
+				(IRQSTACKSIZE - 64) / sizeof(*irqstack);
+
+			if (stack >= irqstack && stack < irqstack_end) {
+				i += printk(" <IRQ> ");
+				HANDLE_STACK (stack < irqstack_end);
+				stack = (unsigned long *) (irqstack_end[-1]);
+				irqstack_end = NULL;
+				i += printk(" <EOI> ");
+				continue;
 			}
-		} 
-		stack = (unsigned long *) (irqstack_end[-1]);
-		printk(" <EOI> ");
-		i += 7;
-	} 
-
-	while (((long) stack & (THREAD_SIZE-1)) != 0) {
-		addr = *stack++;
-		if (__kernel_text_address(addr)) {
-			i += printk_address(addr);
-			i += printk(" "); 
-			if (i > 50) { 
-				printk("\n       ");
-					 i = 0;
-			} 
 		}
+		break;
 	}
+
+	HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
+#undef HANDLE_STACK
 	printk("\n");
 }
 
From: torvalds <torvalds>
Date: Tue, 29 Mar 2005 04:43:20 +0000 (+0000)
Subject: Merge whitespace and __nocast changes
X-Git-Tag: v2.6.12-rc2
X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/old-2.6-bkcvs.git;a=commitdiff;h=b035f9332ce7e205af43f7cfdf4e1cf3625f7ad5

--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -124,10 +124,10 @@ unsigned long *in_exception_stack(int cp
 	int k;
 	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
 		struct tss_struct *tss = &per_cpu(init_tss, cpu);
-		unsigned long end = tss->ist[k] + EXCEPTION_STKSZ;
+		unsigned long start = tss->ist[k] - EXCEPTION_STKSZ;
 
-		if (stack >= tss->ist[k]  && stack <= end)
-			return (unsigned long *)end;
+		if (stack >= start && stack < tss->ist[k])
+			return (unsigned long *)tss->ist[k];
 	}
 	return NULL;
 } 
@@ -348,7 +348,6 @@ void oops_end(void)
 	die_owner = -1;
 	bust_spinlocks(0); 
 	spin_unlock(&die_lock); 
-	local_irq_enable();	/* make sure back scroll still works */
 	if (panic_on_oops)
 		panic("Oops"); 
 } 
@@ -617,15 +616,6 @@ asmlinkage void default_do_nmi(struct pt
 		mem_parity_error(reason, regs);
 	if (reason & 0x40)
 		io_check_error(reason, regs);
-
-	/*
-	 * Reassert NMI in case it became active meanwhile
-	 * as it's edge-triggered.
-	 */
-	outb(0x8f, 0x70);
-	inb(0x71);		/* dummy */
-	outb(0x0f, 0x70);
-	inb(0x71);		/* dummy */
 }
 
 asmlinkage void do_int3(struct pt_regs * regs, long error_code)
From: ak <ak>
Date: Fri, 17 Sep 2004 19:02:51 +0000 (+0000)
Subject: [PATCH] x86-64: turn tss into per cpu data
X-Git-Tag: v2.6.9-rc3
X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/old-2.6-bkcvs.git;a=commitdiff;h=cd90df2df1d046ca81de40b1775948bac2589ff3

  [PATCH] x86-64: turn tss into per cpu data
  
  Turn per cpu TSS into per cpu data.
  
  Signed-off-by: Andi Kleen <ak@muc.de>
  Signed-off-by: Andrew Morton <akpm@osdl.org>
  Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  
  BKrev: 414b34dbBQ6oHL0JUCgQuOzKTeLq9g
---

--- a/arch/x86_64/kernel/init_task.c
+++ b/arch/x86_64/kernel/init_task.c
@@ -44,8 +44,7 @@ EXPORT_SYMBOL(init_task);
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */ 
-struct tss_struct init_tss[NR_CPUS] __cacheline_aligned;
-
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp;
 
 #define ALIGN_TO_4K __attribute__((section(".data.init_task")))
 
--- a/arch/x86_64/kernel/ioport.c
+++ b/arch/x86_64/kernel/ioport.c
@@ -61,7 +61,7 @@ asmlinkage long sys_ioperm(unsigned long
 	 * do it in the per-thread copy and in the TSS ...
 	 */
 	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
-	tss = init_tss + get_cpu();
+	tss = &per_cpu(init_tss, get_cpu());
 	if (tss->io_bitmap_base == IO_BITMAP_OFFSET) { /* already active? */
 		set_bitmap(tss->io_bitmap, from, num, !turn_on);
 	} else {
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -251,7 +251,7 @@ void exit_thread(void)
 {
 	struct task_struct *me = current;
 	if (me->thread.io_bitmap_ptr) { 
-		struct tss_struct *tss = init_tss + get_cpu();
+		struct tss_struct *tss = &per_cpu(init_tss,get_cpu());
 		kfree(me->thread.io_bitmap_ptr); 
 		me->thread.io_bitmap_ptr = NULL;
 		tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
@@ -404,7 +404,7 @@ struct task_struct *__switch_to(struct t
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
 	int cpu = smp_processor_id();  
-	struct tss_struct *tss = init_tss + cpu;
+	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 
 	unlazy_fpu(prev_p);
 
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -237,7 +237,7 @@ void __init cpu_init (void)
 #else
 	int cpu = smp_processor_id();
 #endif
-	struct tss_struct * t = &init_tss[cpu];
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
 	unsigned long v; 
 	char *estacks = NULL; 
 	struct task_struct *me;
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -575,7 +575,7 @@ static void __init do_boot_cpu (int apic
 	start_rip = setup_trampoline();
 
 	init_rsp = idle->thread.rsp; 
-	init_tss[cpu].rsp0 = init_rsp;
+	per_cpu(init_tss,cpu).rsp0 = init_rsp;
 	initial_code = start_secondary;
 	clear_ti_thread_flag(idle->thread_info, TIF_FORK);
 
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -129,7 +129,7 @@ void restore_processor_state(void)
 void fix_processor_context(void)
 {
 	int cpu = smp_processor_id();
-	struct tss_struct * t = init_tss + cpu;
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
 
 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
 
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -110,9 +110,10 @@ unsigned long *in_exception_stack(int cp
 { 
 	int k;
 	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
-		unsigned long end = init_tss[cpu].ist[k] + EXCEPTION_STKSZ; 
+		struct tss_struct *tss = &per_cpu(init_tss, cpu);
+		unsigned long end = tss->ist[k] + EXCEPTION_STKSZ;
 
-		if (stack >= init_tss[cpu].ist[k]  && stack <= end) 
+		if (stack >= tss->ist[k]  && stack <= end)
 			return (unsigned long *)end;
 	}
 	return NULL;
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -18,6 +18,7 @@
 #include <asm/current.h>
 #include <asm/system.h>
 #include <asm/mmsegment.h>
+#include <asm/percpu.h>
 #include <linux/personality.h>
 
 #define TF_MASK		0x00000100
@@ -77,9 +78,6 @@ struct cpuinfo_x86 {
 #define X86_VENDOR_NUM 8
 #define X86_VENDOR_UNKNOWN 0xff
 
-extern struct cpuinfo_x86 boot_cpu_data;
-extern struct tss_struct init_tss[NR_CPUS];
-
 #ifdef CONFIG_SMP
 extern struct cpuinfo_x86 cpu_data[];
 #define current_cpu_data cpu_data[smp_processor_id()]
@@ -229,6 +227,9 @@ struct tss_struct {
 	unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
 } __attribute__((packed)) ____cacheline_aligned;
 
+extern struct cpuinfo_x86 boot_cpu_data;
+DECLARE_PER_CPU(struct tss_struct,init_tss);
+
 #define ARCH_MIN_TASKALIGN	16
 
 struct thread_struct {
commit 51e31546a2fc46cb978da2ee0330a6a68f07541e
tree d0ce9dbf6615dc17e2423d5967a940a2f667abb6
parent 3a36ef7ace64c507f6b087071429dbfed7d2a96d
author Andi Kleen <ak@suse.de> 1120139192 +0200
committer Marcelo Tosatti <marcelo.tosatti@cyclades.com> 1120172534 -0300

[PATCH] x86_64: Disable exception stack for stack faults

Stack segment faults were executed on a exception stack. But they
use the normal return path and can schedule there, but scheduling
is not allowed on a exception stack.

Just drop the exception stack for stack segment faults. This
will make some oops triple fault now, but that's better than
allowing user triggerable oops.

Double faults still have this problem,  but if they happen you
have enough other problems already that this one doesn't matter
anymore.

2.6 has a more complicated fix here that actually handles
this properly, but for 2.4 the simple version is better.

Found from RedHat QA using crashme

Signed-off-by: Andi Kleen <ak@suse.de>

I:100644 100644 e3fdfe1d4efc3a5dfa69d6a8ff37f65982a11955 16c04186194011a0806251d6c8b1ff2227411a8c M	arch/x86_64/kernel/traps.c
I:100644 100644 3e7cac6e1b5f79c4e5a555d7b5525a9090e86b9a 45342a926f6f88b8bd273b634a269c684ceac873 M	include/asm-x86_64/processor.h

Key:
S: Skipped
I: Included Included verbatim
D: Deleted  Manually deleted by subsequent user edit
R: Revised  Manually revised by subsequent user edit

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -857,7 +857,7 @@ void __init trap_init(void)
 	set_intr_gate(9,&coprocessor_segment_overrun);
 	set_intr_gate(10,&invalid_TSS);
 	set_intr_gate(11,&segment_not_present);
-	set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK);
+	set_intr_gate(12,&stack_segment);
 	set_intr_gate(13,&general_protection);
 	set_intr_gate(14,&page_fault);
 	set_intr_gate(15,&spurious_interrupt_bug);
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -325,10 +325,9 @@ struct thread_struct {
 #define INIT_MMAP \
 { &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
 
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2 
-#define NMI_STACK 3 
-#define N_EXCEPTION_STACKS 3  /* hw limit: 7 */
+#define DOUBLEFAULT_STACK 1
+#define NMI_STACK 2
+#define N_EXCEPTION_STACKS 2  /* hw limit: 7 */
 #define EXCEPTION_STKSZ PAGE_SIZE
 #define EXCEPTION_STK_ORDER 0
 

Reply to: