Re: Bug#486071: Pre-approval for heartbeat testing-proposed-updates upload (2.1.3-6lenny1)
On Fri, Feb 13, 2009 at 11:36:11AM +0100, Philipp Kern wrote:
> On Fri, Feb 13, 2009 at 10:54:02AM +0100, Luk Claes wrote:
> > Simon Horman wrote:
> > > I would like to upload a(nother) fresh version of heartbeat to fix
> > > a fairly severe bug. The fix has been in unstable since 2.1.3-7 and
> > > was included upstream at around the same time - it seems to be well tested.
> > Unfortunately this is too late to make it. Please request to include it
> > in r1 after the release, TIA.
>
> Oh and please attach a debdiff from the version in Lenny to the one with
> your fix to the bug report. Your upload should target
> 'stable-proposed-updates'. TIA.
Thanks, as Lenny has now been released I have made the upload.
The debdiff is below and this email should record it in the BTS.
--
Simon Horman
VA Linux Systems Japan K.K., Sydney, Australia Satellite Office
H: www.vergenet.net/~horms/ W: www.valinux.co.jp/en
diff -u heartbeat-2.1.3/version.Debian heartbeat-2.1.3/version.Debian
--- heartbeat-2.1.3/version.Debian
+++ heartbeat-2.1.3/version.Debian
@@ -1 +1 @@
-2.1.3-5
+2.1.3-6lenny1
diff -u heartbeat-2.1.3/debian/changelog heartbeat-2.1.3/debian/changelog
--- heartbeat-2.1.3/debian/changelog
+++ heartbeat-2.1.3/debian/changelog
@@ -1,3 +1,14 @@
+heartbeat (2.1.3-6lenny1) stable-proposed-updates; urgency=low
+
+ * dopd: fix basic failover; fix hb message corruption by fprintf(stderr)
+ Patch: fix-basic-failover-fix-hb-message-corruption-by-fprintf.patch
+ Upstream-Status: commit 47f60bebe7b25abd88ea7b5488e66dfe187416ae
+ "dopd: fix basic failover; fix hb message corruption by
+ fprintf(stderr)"
+ (closes: #486071)
+
+ -- Simon Horman <horms@debian.org> Mon, 16 Feb 2009 02:54:43 +0000
+
heartbeat (2.1.3-6lenny0) testing-proposed-updates; urgency=low
* heartbeat-gui dependancy on python-xml
only in patch2:
unchanged:
--- heartbeat-2.1.3.orig/debian/patches/dopd-fix-basic-failover-fix-hb-message-corruption-by-fprintf_stderr_.patch
+++ heartbeat-2.1.3/debian/patches/dopd-fix-basic-failover-fix-hb-message-corruption-by-fprintf_stderr_.patch
@@ -0,0 +1,110 @@
+# HG changeset patch
+# User Rasto Levrinc <rasto@linbit.com>
+# Date 1206539836 -3600
+# Node ID 47f60bebe7b25abd88ea7b5488e66dfe187416ae
+# Parent 17c0cf487322287d0689a036c32f21b900ce5a80
+dopd: fix basic failover; fix hb message corruption by fprintf(stderr)
+
+check_drbd_peer() used to return FALSE for "node name not in node list",
+so drbd-peer-outdater returned "invalid nodename".
+Then the semantic changed, and check_drbd_peer learned about "dead" peers
+and returned FALSE for them as well. Which made basic failover impossible :(
+
+The return code was now changed to "peer unreachable" for a dead peer.
+And even for nodes which really are not in the host list (and thus could be
+classified as invalide), because, after all, thats what they are.
+unreachable.
+
+Node name comparison needs to be case insensitive; fixed.
+
+During testing with 15 concurrent drbd resources several dopd crashes have been
+observed, which after some debugging turned out to be simply a wrong assumption
+about the global availability of stderr: some fprintf(stderr, "debug message")
+had accidentally used the heartbeat communication channel file descriptor,
+which seriously confused the comm layer.
+All those fprintfs have now been changed to use cl_log.
+
+diff -r 17c0cf487322 -r 47f60bebe7b2 contrib/drbd-outdate-peer/dopd.c
+--- a/contrib/drbd-outdate-peer/dopd.c Mon Mar 24 16:14:12 2008 +0100
++++ b/contrib/drbd-outdate-peer/dopd.c Wed Mar 26 14:57:16 2008 +0100
+@@ -202,14 +202,17 @@
+ }
+
+ /* check_drbd_peer()
+- * walk the nodes and return TRUE if peer is not this node and it exists.
++ * walk the nodes and return
++ * FALSE if peer is not found, not a "normal" node, or "dead"
++ * (no point in trying to reach those nodes).
++ * TRUE if peer is found to be alive and "normal".
+ */
+ gboolean
+ check_drbd_peer(const char *drbd_peer)
+ {
+ const char *node;
+ gboolean found = FALSE;
+- if (!strcmp(drbd_peer, node_name)) {
++ if (!strcasecmp(drbd_peer, node_name)) {
+ cl_log(LOG_WARNING, "drbd peer node %s is me!\n", drbd_peer);
+ return FALSE;
+ }
+@@ -306,9 +309,9 @@
+ } else
+ pthread_mutex_unlock(&conn_mutex);
+ } else {
+- /* wrong peer was specified,
+- send return code 20 to the client */
+- send_to_client(curr_client, "20");
++ /* peer "dead" or not in node list.
++ * return "peer could not be reached" */
++ send_to_client(curr_client, "5");
+ }
+
+ ha_msg_del(msg);
+diff -r 17c0cf487322 -r 47f60bebe7b2 contrib/drbd-outdate-peer/drbd-peer-outdater.c
+--- a/contrib/drbd-outdate-peer/drbd-peer-outdater.c Mon Mar 24 16:14:12 2008 +0100
++++ b/contrib/drbd-outdate-peer/drbd-peer-outdater.c Wed Mar 26 14:57:16 2008 +0100
+@@ -76,7 +76,7 @@
+
+ msg = msgfromIPC_noauth(server);
+ if (!msg) {
+- fprintf(stderr, "no message from server or other "
++ cl_log(LOG_WARNING, "no message from server or other "
+ "instance is running\n");
+ if (client->mainloop != NULL &&
+ g_main_is_running(client->mainloop))
+@@ -92,7 +92,7 @@
+ errno = 0;
+ rc = strtol(rc_string, &ep, 10);
+ if (errno != 0 || *ep != EOS) {
+- fprintf(stderr, "unknown message: %s from server", rc_string);
++ cl_log(LOG_WARNING, "unknown message: %s from server", rc_string);
+ client->rc = 20; /* "officially undefined", unspecified error */
+ ha_msg_del(msg);
+ if (client->mainloop != NULL &&
+@@ -124,7 +124,7 @@
+ outdater_timeout_dispatch(gpointer user_data)
+ {
+ dop_client_t *client = (dop_client_t *)user_data;
+- fprintf(stderr, "error: could not connect to dopd after %i seconds"
++ cl_log(LOG_WARNING, "error: could not connect to dopd after %i seconds"
+ ": timeout reached\n", client->timeout);
+ if (client->mainloop != NULL && g_main_is_running(client->mainloop))
+ g_main_quit(client->mainloop);
+@@ -255,7 +255,7 @@
+ (gpointer)new_client, &ipc_server);
+
+ if (ipc_server == NULL) {
+- fprintf(stderr, "Could not connect to "T_OUTDATER" channel\n");
++ cl_log(LOG_WARNING, "Could not connect to "T_OUTDATER" channel\n");
+ dop_exit(new_client); /* unreachable */
+ }
+
+@@ -267,7 +267,7 @@
+ ha_msg_add(update, F_OUTDATER_RES, drbd_resource);
+
+ if (msg2ipcchan(update, ipc_server) != HA_OK) {
+- fprintf(stderr, "Could not send message\n");
++ cl_log(LOG_WARNING, "Could not send message\n");
+ dop_exit(new_client);
+ }
+
only in patch2:
unchanged:
--- heartbeat-2.1.3.orig/debian/patches/series/2.1.3-6lenny1
+++ heartbeat-2.1.3/debian/patches/series/2.1.3-6lenny1
@@ -0,0 +1 @@
++ dopd-fix-basic-failover-fix-hb-message-corruption-by-fprintf_stderr_.patch
only in patch2:
unchanged:
--- heartbeat-2.1.3.orig/contrib/drbd-outdate-peer/drbd-peer-outdater.c
+++ heartbeat-2.1.3/contrib/drbd-outdate-peer/drbd-peer-outdater.c
@@ -76,7 +76,7 @@
msg = msgfromIPC_noauth(server);
if (!msg) {
- fprintf(stderr, "no message from server or other "
+ cl_log(LOG_WARNING, "no message from server or other "
"instance is running\n");
if (client->mainloop != NULL &&
g_main_is_running(client->mainloop))
@@ -92,7 +92,7 @@
errno = 0;
rc = strtol(rc_string, &ep, 10);
if (errno != 0 || *ep != EOS) {
- fprintf(stderr, "unknown message: %s from server", rc_string);
+ cl_log(LOG_WARNING, "unknown message: %s from server", rc_string);
client->rc = 20; /* "officially undefined", unspecified error */
ha_msg_del(msg);
if (client->mainloop != NULL &&
@@ -124,7 +124,7 @@
outdater_timeout_dispatch(gpointer user_data)
{
dop_client_t *client = (dop_client_t *)user_data;
- fprintf(stderr, "error: could not connect to dopd after %i seconds"
+ cl_log(LOG_WARNING, "error: could not connect to dopd after %i seconds"
": timeout reached\n", client->timeout);
if (client->mainloop != NULL && g_main_is_running(client->mainloop))
g_main_quit(client->mainloop);
@@ -255,7 +255,7 @@
(gpointer)new_client, &ipc_server);
if (ipc_server == NULL) {
- fprintf(stderr, "Could not connect to "T_OUTDATER" channel\n");
+ cl_log(LOG_WARNING, "Could not connect to "T_OUTDATER" channel\n");
dop_exit(new_client); /* unreachable */
}
@@ -267,7 +267,7 @@
ha_msg_add(update, F_OUTDATER_RES, drbd_resource);
if (msg2ipcchan(update, ipc_server) != HA_OK) {
- fprintf(stderr, "Could not send message\n");
+ cl_log(LOG_WARNING, "Could not send message\n");
dop_exit(new_client);
}
only in patch2:
unchanged:
--- heartbeat-2.1.3.orig/contrib/drbd-outdate-peer/dopd.c
+++ heartbeat-2.1.3/contrib/drbd-outdate-peer/dopd.c
@@ -202,14 +202,17 @@
}
/* check_drbd_peer()
- * walk the nodes and return TRUE if peer is not this node and it exists.
+ * walk the nodes and return
+ * FALSE if peer is not found, not a "normal" node, or "dead"
+ * (no point in trying to reach those nodes).
+ * TRUE if peer is found to be alive and "normal".
*/
gboolean
check_drbd_peer(const char *drbd_peer)
{
const char *node;
gboolean found = FALSE;
- if (!strcmp(drbd_peer, node_name)) {
+ if (!strcasecmp(drbd_peer, node_name)) {
cl_log(LOG_WARNING, "drbd peer node %s is me!\n", drbd_peer);
return FALSE;
}
@@ -306,9 +309,9 @@
} else
pthread_mutex_unlock(&conn_mutex);
} else {
- /* wrong peer was specified,
- send return code 20 to the client */
- send_to_client(curr_client, "20");
+ /* peer "dead" or not in node list.
+ * return "peer could not be reached" */
+ send_to_client(curr_client, "5");
}
ha_msg_del(msg);
Reply to: