[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#646063: net: fix route cache rebuilds



Package: linux-source-2.6.32
Version: 2.6.32-38
Severity: critical
Tags: squeeze patch
Justification: breaks the whole system


Hi,

Debian Squeeze running kernel 2.6.32 suffers the following bug, discussed on the kernel mailing list netdev@vger.kernel.org:
http://kerneltrap.org/mailarchive/linux-netdev/2010/3/8/6271476

In detail: [...]
Oct 12 11:54:28 spozerl kernel: [180385.555758] Route hash chain too long!
Oct 12 11:54:28 spozerl kernel: [180385.555760] Adjust your secret_interval!
Oct 12 12:01:52 spozerl kernel: [180829.114321] dst cache overflow
Oct 12 12:01:52 spozerl kernel: [180829.129033] dst cache overflow
Oct 12 12:01:52 spozerl kernel: [180829.130873] dst cache overflow
Oct 12 12:01:52 spozerl kernel: [180829.139006] dst cache overflow
[...] until the kernel network stack freezes after a while.

To resolve the kernel hangups (of network connectivity) I've applied the patch of Eric Dumazet to the linux-source-2.6.32 package, which had resolved the issue at my loaded Debian Squeeze router, here:
http://patchwork.ozlabs.org/patch/47114/raw/

It would be great, if this patch could be included to the official Debian Squeeze kernel. Maybe this also resolves some other strange network hangups described by other users.

-Florian

-- System Information:
Debian Release: 6.0.3
  APT prefers stable
  APT policy: (500, 'stable')
Architecture: i386 (i686)

Kernel: Linux 2.6.32-fix-route-cache-rebuilds (SMP w/1 CPU core)
Locale: LANG=de_DE.UTF-8, LC_CTYPE=de_DE.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash

Versions of packages linux-source-2.6.32 depends on:
ii  binutils                      2.20.1-16  The GNU assembler, linker and bina
ii  bzip2                         1.0.5-6    high-quality block-sorting file co

Versions of packages linux-source-2.6.32 recommends:
ii  gcc                           4:4.4.5-1  The GNU C compiler
ii  libc6-dev [libc-dev]          2.11.2-10  Embedded GNU C Library: Developmen
ii  make                          3.81-8     An utility for Directing compilati

Versions of packages linux-source-2.6.32 suggests:
ii  kernel-package            12.036+nmu1    A utility for building Linux kerne
ii  libncurses5-dev [ncurses- 5.7+20100313-5 developer's libraries and docs for
pn  libqt3-mt-dev             <none>         (no description available)

-- no debconf information
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b2ba558..d9b4024 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 static int rt_garbage_collect(struct dst_ops *ops);
-static void rt_emergency_hash_rebuild(struct net *net);
 
 
 static struct dst_ops ipv4_dst_ops = {
@@ -780,11 +779,30 @@ static void rt_do_flush(int process_context)
 #define FRACT_BITS 3
 #define ONE (1UL << FRACT_BITS)
 
+/*
+ * Given a hash chain and an item in this hash chain,
+ * find if a previous entry has the same hash_inputs
+ * (but differs on tos, mark or oif)
+ * Returns 0 if an alias is found.
+ * Returns ONE if rth has no alias before itself.
+ */
+static int has_noalias(const struct rtable *head, const struct rtable *rth)
+{
+	const struct rtable *aux = head;
+
+	while (aux != rth) {
+		if (compare_hash_inputs(&aux->fl, &rth->fl))
+			return 0;
+		aux = aux->u.dst.rt_next;
+	}
+	return ONE;
+}
+
 static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
-	struct rtable *rth, *aux, **rthp;
+	struct rtable *rth, **rthp;
 	unsigned long samples = 0;
 	unsigned long sum = 0, sum2 = 0;
 	unsigned long delta;
@@ -835,15 +853,7 @@ nofree:
 					 * attributes don't unfairly skew
 					 * the length computation
 					 */
-					for (aux = rt_hash_table[i].chain;;) {
-						if (aux == rth) {
-							length += ONE;
-							break;
-						}
-						if (compare_hash_inputs(&aux->fl, &rth->fl))
-							break;
-						aux = aux->u.dst.rt_next;
-					}
+					length += has_noalias(rt_hash_table[i].chain, rth);
 					continue;
 				}
 			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
@@ -1073,6 +1083,21 @@ work_done:
 out:	return 0;
 }
 
+/*
+ * Returns number of entries in a hash chain that have different hash_inputs
+ */
+static int slow_chain_length(const struct rtable *head)
+{
+	int length = 0;
+	const struct rtable *rth = head;
+
+	while (rth) {
+		length += has_noalias(head, rth);
+		rth = rth->u.dst.rt_next;
+	}
+	return length >> FRACT_BITS;
+}
+
 static int rt_intern_hash(unsigned hash, struct rtable *rt,
 			  struct rtable **rp, struct sk_buff *skb)
 {
@@ -1185,7 +1210,8 @@ restart:
 			rt_free(cand);
 		}
 	} else {
-		if (chain_length > rt_chain_length_max) {
+		if (chain_length > rt_chain_length_max &&
+		    slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
 			struct net *net = dev_net(rt->u.dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
 			if (!rt_caching(dev_net(rt->u.dst.dev))) {

Reply to: