Bug#646063: net: fix route cache rebuilds
Package: linux-source-2.6.32
Version: 2.6.32-38
Severity: critical
Tags: squeeze patch
Justification: breaks the whole system
Hi,
Debian Squeeze running kernel 2.6.32 suffers the following bug, discussed on the kernel mailing list netdev@vger.kernel.org:
http://kerneltrap.org/mailarchive/linux-netdev/2010/3/8/6271476
In detail: [...]
Oct 12 11:54:28 spozerl kernel: [180385.555758] Route hash chain too long!
Oct 12 11:54:28 spozerl kernel: [180385.555760] Adjust your secret_interval!
Oct 12 12:01:52 spozerl kernel: [180829.114321] dst cache overflow
Oct 12 12:01:52 spozerl kernel: [180829.129033] dst cache overflow
Oct 12 12:01:52 spozerl kernel: [180829.130873] dst cache overflow
Oct 12 12:01:52 spozerl kernel: [180829.139006] dst cache overflow
[...] until the kernel network stack freezes after a while.
To resolve the kernel hangups (of network connectivity) I've applied the patch of Eric Dumazet to the linux-source-2.6.32 package, which had resolved the issue at my loaded Debian Squeeze router, here:
http://patchwork.ozlabs.org/patch/47114/raw/
It would be great, if this patch could be included to the official Debian Squeeze kernel. Maybe this also resolves some other strange network hangups described by other users.
-Florian
-- System Information:
Debian Release: 6.0.3
APT prefers stable
APT policy: (500, 'stable')
Architecture: i386 (i686)
Kernel: Linux 2.6.32-fix-route-cache-rebuilds (SMP w/1 CPU core)
Locale: LANG=de_DE.UTF-8, LC_CTYPE=de_DE.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash
Versions of packages linux-source-2.6.32 depends on:
ii binutils 2.20.1-16 The GNU assembler, linker and bina
ii bzip2 1.0.5-6 high-quality block-sorting file co
Versions of packages linux-source-2.6.32 recommends:
ii gcc 4:4.4.5-1 The GNU C compiler
ii libc6-dev [libc-dev] 2.11.2-10 Embedded GNU C Library: Developmen
ii make 3.81-8 An utility for Directing compilati
Versions of packages linux-source-2.6.32 suggests:
ii kernel-package 12.036+nmu1 A utility for building Linux kerne
ii libncurses5-dev [ncurses- 5.7+20100313-5 developer's libraries and docs for
pn libqt3-mt-dev <none> (no description available)
-- no debconf information
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b2ba558..d9b4024 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
static int rt_garbage_collect(struct dst_ops *ops);
-static void rt_emergency_hash_rebuild(struct net *net);
static struct dst_ops ipv4_dst_ops = {
@@ -780,11 +779,30 @@ static void rt_do_flush(int process_context)
#define FRACT_BITS 3
#define ONE (1UL << FRACT_BITS)
+/*
+ * Given a hash chain and an item in this hash chain,
+ * find if a previous entry has the same hash_inputs
+ * (but differs on tos, mark or oif)
+ * Returns 0 if an alias is found.
+ * Returns ONE if rth has no alias before itself.
+ */
+static int has_noalias(const struct rtable *head, const struct rtable *rth)
+{
+ const struct rtable *aux = head;
+
+ while (aux != rth) {
+ if (compare_hash_inputs(&aux->fl, &rth->fl))
+ return 0;
+ aux = aux->u.dst.rt_next;
+ }
+ return ONE;
+}
+
static void rt_check_expire(void)
{
static unsigned int rover;
unsigned int i = rover, goal;
- struct rtable *rth, *aux, **rthp;
+ struct rtable *rth, **rthp;
unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0;
unsigned long delta;
@@ -835,15 +853,7 @@ nofree:
* attributes don't unfairly skew
* the length computation
*/
- for (aux = rt_hash_table[i].chain;;) {
- if (aux == rth) {
- length += ONE;
- break;
- }
- if (compare_hash_inputs(&aux->fl, &rth->fl))
- break;
- aux = aux->u.dst.rt_next;
- }
+ length += has_noalias(rt_hash_table[i].chain, rth);
continue;
}
} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
@@ -1073,6 +1083,21 @@ work_done:
out: return 0;
}
+/*
+ * Returns number of entries in a hash chain that have different hash_inputs
+ */
+static int slow_chain_length(const struct rtable *head)
+{
+ int length = 0;
+ const struct rtable *rth = head;
+
+ while (rth) {
+ length += has_noalias(head, rth);
+ rth = rth->u.dst.rt_next;
+ }
+ return length >> FRACT_BITS;
+}
+
static int rt_intern_hash(unsigned hash, struct rtable *rt,
struct rtable **rp, struct sk_buff *skb)
{
@@ -1185,7 +1210,8 @@ restart:
rt_free(cand);
}
} else {
- if (chain_length > rt_chain_length_max) {
+ if (chain_length > rt_chain_length_max &&
+ slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
struct net *net = dev_net(rt->u.dst.dev);
int num = ++net->ipv4.current_rt_cache_rebuild_count;
if (!rt_caching(dev_net(rt->u.dst.dev))) {
Reply to: