[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#928771: unblock: sbd/1.4.0-18-g5e3283c-1



Package: release.debian.org
Severity: normal
User: release.debian.org@packages.debian.org
Usertags: unblock

Please unblock package sbd

New version contains upstream fixes for some usecases and updates
package tests to work with Corosync/Pacemaker versions in buster.

unblock sbd/1.4.0-18-g5e3283c-1

-- System Information:
Debian Release: buster/sid
  APT prefers unstable
  APT policy: (500, 'unstable')
Architecture: amd64 (x86_64)

Kernel: Linux 4.19.0-3-amd64 (SMP w/8 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8), LANGUAGE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled
diff -Nru sbd-1.4.0/debian/changelog sbd-1.4.0-18-g5e3283c/debian/changelog
--- sbd-1.4.0/debian/changelog	2019-01-15 09:25:28.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/changelog	2019-05-08 10:55:44.000000000 +0200
@@ -1,3 +1,12 @@
+sbd (1.4.0-18-g5e3283c-1) unstable; urgency=medium
+
+  * New upstream version 1.4.0-18-g5e3283c (Closes: #925821)
+  * debian/sbd.lintian-overrides: update manpage line
+  * debian/patches: use /run for PIDFile location
+  * debian/tests: update for corosync v3
+
+ -- Valentin Vidic <vvidic@debian.org>  Wed, 08 May 2019 10:55:44 +0200
+
 sbd (1.4.0-1) unstable; urgency=medium
 
   * New upstream version 1.4.0
diff -Nru sbd-1.4.0/debian/patches/pidfile-in-runstatedir.patch sbd-1.4.0-18-g5e3283c/debian/patches/pidfile-in-runstatedir.patch
--- sbd-1.4.0/debian/patches/pidfile-in-runstatedir.patch	1970-01-01 01:00:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/patches/pidfile-in-runstatedir.patch	2019-05-08 10:55:20.000000000 +0200
@@ -0,0 +1,28 @@
+Description: Use /run for PIDFile location
+ systemd complains if PIDFile uses /var/run
+Author: Valentin Vidic <vvidic@debian.org>
+Last-Update: 2019-04-26
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- a/src/sbd.service.in
++++ b/src/sbd.service.in
+@@ -10,7 +10,7 @@
+ 
+ [Service]
+ Type=forking
+-PIDFile=@localstatedir@/run/sbd.pid
++PIDFile=@runstatedir@/sbd.pid
+ EnvironmentFile=-@CONFIGDIR@/sbd
+ ExecStart=@sbindir@/sbd $SBD_OPTS -p @localstatedir@/run/sbd.pid watch
+ ExecStop=@bindir@/kill -TERM $MAINPID
+--- a/src/sbd_remote.service.in
++++ b/src/sbd_remote.service.in
+@@ -8,7 +8,7 @@
+ 
+ [Service]
+ Type=forking
+-PIDFile=@localstatedir@/run/sbd.pid
++PIDFile=@runstatedir@/sbd.pid
+ EnvironmentFile=-@CONFIGDIR@/sbd
+ ExecStart=@sbindir@/sbd $SBD_OPTS -p @localstatedir@/run/sbd.pid watch
+ ExecStop=@bindir@/kill -TERM $MAINPID
diff -Nru sbd-1.4.0/debian/patches/series sbd-1.4.0-18-g5e3283c/debian/patches/series
--- sbd-1.4.0/debian/patches/series	1970-01-01 01:00:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/patches/series	2019-05-08 10:55:20.000000000 +0200
@@ -0,0 +1 @@
+pidfile-in-runstatedir.patch
diff -Nru sbd-1.4.0/debian/sbd.lintian-overrides sbd-1.4.0-18-g5e3283c/debian/sbd.lintian-overrides
--- sbd-1.4.0/debian/sbd.lintian-overrides	2019-01-15 09:12:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/sbd.lintian-overrides	2019-05-08 10:55:01.000000000 +0200
@@ -1 +1 @@
-manpage-has-errors-from-man usr/share/man/man8/sbd.8.gz 185: warning [p 1, 8.7i]: can't break line
+manpage-has-errors-from-man usr/share/man/man8/sbd.8.gz 189: warning [p 1, 8.7i]: can't break line
diff -Nru sbd-1.4.0/debian/tests/control sbd-1.4.0-18-g5e3283c/debian/tests/control
--- sbd-1.4.0/debian/tests/control	2019-01-15 09:12:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/tests/control	2019-05-08 10:55:28.000000000 +0200
@@ -14,10 +14,10 @@
 Restrictions: needs-root, allow-stderr, isolation-machine
 Tests: regression
 
-Depends: @, pacemaker, crmsh
+Depends: @
 Restrictions: needs-root, isolation-machine, breaks-testbed
 Tests: fence-external
 
-Depends: @, pacemaker, crmsh, fence-agents
+Depends: @
 Restrictions: needs-root, isolation-machine, breaks-testbed
 Tests: fence-agents
diff -Nru sbd-1.4.0/debian/tests/fence-agents sbd-1.4.0-18-g5e3283c/debian/tests/fence-agents
--- sbd-1.4.0/debian/tests/fence-agents	2019-01-15 09:12:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/tests/fence-agents	2019-05-08 10:55:28.000000000 +0200
@@ -14,21 +14,24 @@
 LOOP=$(losetup --find --show $DISK)
 
 echo "=== create ==="
+hostname node1 # must match corosync for fence to work
 sbd -d $LOOP create
-echo "SBD_OPTS='-d $LOOP -W -W'" > /etc/default/sbd
+sed -i "s|^#\\?\\(SBD_DEVICE=\\).*|\\1$LOOP|" /etc/default/sbd
+sed -i "s|^\\(SBD_WATCHDOG_DEV=\\).*|\\1/dev/null|" /etc/default/sbd
 
 echo "=== cluster ==="
-service corosync start
-service pacemaker start
-sleep 60
+apt-get --yes --quiet install pacemaker crmsh fence-agents
 service sbd status
-crm status
 
-echo "=== crm ==="
-HOSTNAME=$(uname -n)
-crm configure primitive sbd stonith:fence_sbd params devices=$LOOP plug=$HOSTNAME sbd_path=/usr/sbin/sbd
+echo -n "Waiting for cluster to start... "
+for x in `seq 60 -1 1`; do echo -n "$x "; sleep 1; done; echo
+crm configure primitive sbd stonith:fence_sbd params devices=$LOOP plug=node1 sbd_path=/usr/sbin/sbd
 crm configure show
 
+echo -n "Waiting for resource to start... "
+for x in `seq 10 -1 1`; do echo -n "$x "; sleep 1; done; echo
+crm status
+
 echo "=== fence ==="
 /tmp/autopkgtest-reboot-prepare fenced
-crm --force node fence $HOSTNAME
+crm --force node fence node1
diff -Nru sbd-1.4.0/debian/tests/fence-external sbd-1.4.0-18-g5e3283c/debian/tests/fence-external
--- sbd-1.4.0/debian/tests/fence-external	2019-01-15 09:12:00.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/debian/tests/fence-external	2019-05-08 10:55:28.000000000 +0200
@@ -14,20 +14,24 @@
 LOOP=$(losetup --find --show $DISK)
 
 echo "=== create ==="
+hostname node1 # must match corosync for fence to work
 sbd -d $LOOP create
-echo "SBD_OPTS='-d $LOOP -W -W'" > /etc/default/sbd
+sed -i "s|^#\\?\\(SBD_DEVICE=\\).*|\\1$LOOP|" /etc/default/sbd
+sed -i "s|^\\(SBD_WATCHDOG_DEV=\\).*|\\1/dev/null|" /etc/default/sbd
 
 echo "=== cluster ==="
-service corosync start
-service pacemaker start
-sleep 60
+apt-get --yes --quiet install pacemaker crmsh
 service sbd status
-crm status
 
-echo "=== crm ==="
+echo -n "Waiting for cluster to start... "
+for x in `seq 60 -1 1`; do echo -n "$x "; sleep 1; done; echo
 crm configure primitive sbd stonith:external/sbd params sbd_device=$LOOP
 crm configure show
 
+echo -n "Waiting for resource to start... "
+for x in `seq 10 -1 1`; do echo -n "$x "; sleep 1; done; echo
+crm status
+
 echo "=== fence ==="
 /tmp/autopkgtest-reboot-prepare fenced
-crm --force node fence $(uname -n)
+crm --force node fence node1
diff -Nru sbd-1.4.0/man/sbd.8.pod sbd-1.4.0-18-g5e3283c/man/sbd.8.pod
--- sbd-1.4.0/man/sbd.8.pod	2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/man/sbd.8.pod	2019-04-16 14:38:22.000000000 +0200
@@ -493,7 +493,7 @@
 introduce an additional single point of failure then.
 
 If the SBD device is not accessible, the daemon will fail to start and
-inhibit openais startup.
+inhibit startup of cluster services.
 
 =item Two devices
 
diff -Nru sbd-1.4.0/src/sbd-cluster.c sbd-1.4.0-18-g5e3283c/src/sbd-cluster.c
--- sbd-1.4.0/src/sbd-cluster.c	2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-cluster.c	2019-04-16 14:38:22.000000000 +0200
@@ -174,6 +174,25 @@
     return TRUE;
 }
 
+static void
+cmap_destroy(void)
+{
+    if (cmap_source) {
+        g_source_destroy(cmap_source);
+        cmap_source = NULL;
+    }
+
+    if (track_handle) {
+        cmap_track_delete(cmap_handle, track_handle);
+        track_handle = 0;
+    }
+
+    if (cmap_handle) {
+        cmap_finalize(cmap_handle);
+        cmap_handle = 0;
+    }
+}
+
 static gboolean
 sbd_get_two_node(void)
 {
@@ -217,18 +236,7 @@
     return TRUE;
 
 out:
-    if (cmap_source) {
-        g_source_destroy(cmap_source);
-        cmap_source = NULL;
-    }
-    if (track_handle) {
-        cmap_track_delete(cmap_handle, track_handle);
-        track_handle = 0;
-    }
-    if (cmap_handle) {
-        cmap_finalize(cmap_handle);
-        cmap_handle = 0;
-    }
+    cmap_destroy();
 
     return FALSE;
 }
@@ -327,6 +335,12 @@
 {
     cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type()));
 
+    if (get_cluster_type() != pcmk_cluster_unknown) {
+#if SUPPORT_COROSYNC && CHECK_TWO_NODE
+        cmap_destroy();
+#endif
+    }
+
     set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection terminated");
     notify_parent();
 
diff -Nru sbd-1.4.0/src/sbd-common.c sbd-1.4.0-18-g5e3283c/src/sbd-common.c
--- sbd-1.4.0/src/sbd-common.c	2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-common.c	2019-04-16 14:38:22.000000000 +0200
@@ -568,13 +568,13 @@
 #define IOPRIO_PRIO_DATA(mask)  ((mask) & IOPRIO_PRIO_MASK)
 #define IOPRIO_PRIO_VALUE(class, data)  (((class) << IOPRIO_CLASS_SHIFT) | data)
 
-static unsigned char
+static void
 sbd_stack_hogger(unsigned char * inbuf, int kbytes)
 {
     unsigned char buf[1024];
 
     if(kbytes <= 0) {
-        return HOG_CHAR;
+        return;
     }
 
     if (inbuf == NULL) {
@@ -584,10 +584,10 @@
     }
 
     if (kbytes > 0) {
-        return sbd_stack_hogger(buf, kbytes-1);
-    } else {
-        return buf[sizeof(buf)-1];
+        sbd_stack_hogger(buf, kbytes-1);
     }
+
+    return;
 }
 
 static void
diff -Nru sbd-1.4.0/src/sbd.h sbd-1.4.0-18-g5e3283c/src/sbd.h
--- sbd-1.4.0/src/sbd.h	2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd.h	2019-04-16 14:38:22.000000000 +0200
@@ -54,10 +54,13 @@
 /* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
 
 /* exit status for disk-servant */
-#define EXIT_MD_IO_FAIL             20
-#define EXIT_MD_REQUEST_RESET       21
-#define EXIT_MD_REQUEST_SHUTOFF     22
-#define EXIT_MD_REQUEST_CRASHDUMP   23
+#define EXIT_MD_SERVANT_IO_FAIL             20
+#define EXIT_MD_SERVANT_REQUEST_RESET       21
+#define EXIT_MD_SERVANT_REQUEST_SHUTOFF     22
+#define EXIT_MD_SERVANT_REQUEST_CRASHDUMP   23
+
+/* exit status for pcmk-servant */
+#define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30
 
 #define HOG_CHAR	0xff
 #define SECTOR_NAME_MAX 63
@@ -175,7 +178,7 @@
 int dump_headers(struct servants_list_item *servants);
 unsigned long get_first_msgwait(struct servants_list_item *servants);
 int messenger(const char *name, const char *msg, struct servants_list_item *servants);
-int servant(const char *diskname, int mode, const void* argp);
+int servant_md(const char *diskname, int mode, const void* argp);
 #endif
 
 int servant_pcmk(const char *diskname, int mode, const void* argp);
diff -Nru sbd-1.4.0/src/sbd-inquisitor.c sbd-1.4.0-18-g5e3283c/src/sbd-inquisitor.c
--- sbd-1.4.0/src/sbd-inquisitor.c	2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-inquisitor.c	2019-04-16 14:38:22.000000000 +0200
@@ -42,19 +42,36 @@
 	struct servants_list_item *newbie;
 
 	if (lookup_servant_by_dev(devname)) {
-		cl_log(LOG_DEBUG, "Servant %s already exists", devname);
-		return;
+	    cl_log(LOG_DEBUG, "Servant %s already exists", devname);
+	    return;
 	}
 
 	newbie = malloc(sizeof(*newbie));
-	if (!newbie) {
-		fprintf(stderr, "malloc failed in recruit_servant.\n");
-		exit(1);
+	if (newbie) {
+	    memset(newbie, 0, sizeof(*newbie));
+	    newbie->devname = strdup(devname);
+	    newbie->pid = pid;
+	    newbie->first_start = 1;
+	}
+	if (!newbie || !newbie->devname) {
+	    fprintf(stderr, "heap allocation failed in recruit_servant.\n");
+	    exit(1);
+	}
+
+	/* some sanity-check on our newbie */
+	if (sbd_is_disk(newbie)) {
+	    cl_log(LOG_INFO, "Monitoring %s", devname);
+	    disk_count++;
+	} else if (sbd_is_pcmk(newbie) || sbd_is_cluster(newbie)) {
+	    /* alive just after pcmk and cluster servants have shown up */
+	    newbie->outdated = 1;
+	} else {
+	    /* toss our newbie */
+	    cl_log(LOG_ERR, "Refusing to recruit unrecognized servant %s", devname);
+	    free((void *) newbie->devname);
+	    free(newbie);
+	    return;
 	}
-	memset(newbie, 0, sizeof(*newbie));
-	newbie->devname = strdup(devname);
-	newbie->pid = pid;
-	newbie->first_start = 1;
 
 	if (!s) {
 		servants_leader = newbie;
@@ -65,12 +82,6 @@
 	}
 
 	servant_count++;
-        if(sbd_is_disk(newbie)) {
-            cl_log(LOG_INFO, "Monitoring %s", devname);
-            disk_count++;
-        } else {
-            newbie->outdated = 1;
-        }
 }
 
 int assign_servant(const char* devname, functionp_t functionp, int mode, const void* argp)
@@ -148,7 +159,7 @@
 	if (sbd_is_disk(s)) {
 #if SUPPORT_SHARED_DISK
 		DBGLOG(LOG_INFO, "Starting servant for device %s", s->devname);
-		s->pid = assign_servant(s->devname, servant, start_mode, s);
+		s->pid = assign_servant(s->devname, servant_md, start_mode, s);
 #else
                 cl_log(LOG_ERR, "Shared disk functionality not supported");
                 return;
@@ -479,19 +490,19 @@
 					if (sbd_is_disk(s)) {
 						if (WIFEXITED(status)) {
 							switch(WEXITSTATUS(status)) {
-								case EXIT_MD_IO_FAIL:
+								case EXIT_MD_SERVANT_IO_FAIL:
 									DBGLOG(LOG_INFO, "Servant for %s requests to be disowned",
 										s->devname);
 									break;
-								case EXIT_MD_REQUEST_RESET:
+								case EXIT_MD_SERVANT_REQUEST_RESET:
 									cl_log(LOG_WARNING, "%s requested a reset", s->devname);
 									do_reset();
 									break;
-								case EXIT_MD_REQUEST_SHUTOFF:
+								case EXIT_MD_SERVANT_REQUEST_SHUTOFF:
 									cl_log(LOG_WARNING, "%s requested a shutoff", s->devname);
 									do_off();
 									break;
-								case EXIT_MD_REQUEST_CRASHDUMP:
+								case EXIT_MD_SERVANT_REQUEST_CRASHDUMP:
 									cl_log(LOG_WARNING, "%s requested a crashdump", s->devname);
 									do_crashdump();
 									break;
@@ -499,6 +510,22 @@
 									break;
 							}
 						}
+					} else if (sbd_is_pcmk(s)) {
+						if (WIFEXITED(status)) {
+							switch(WEXITSTATUS(status)) {
+								case EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN:
+									DBGLOG(LOG_INFO, "PCMK-Servant has exited gracefully");
+									/* revert to state prior to pacemaker-detection */
+									s->restarts = 0;
+									s->restart_blocked = 0;
+									cluster_appeared = 0;
+									s->outdated = 1;
+									s->t_last.tv_sec = 0;
+									break;
+								default:
+									break;
+							}
+						}
 					}
 					cleanup_servant_by_pid(pid);
 				}
@@ -753,54 +780,56 @@
 int
 parse_device_line(const char *line)
 {
-    int lpc = 0;
-    int last = 0;
-    int max = 0;
+    size_t lpc = 0;
+    size_t last = 0;
+    size_t max = 0;
     int found = 0;
+    bool skip_space = true;
+    int space_run = 0;
 
-    if(line) {
-        max = strlen(line);
+    if (!line) {
+        return 0;
     }
 
-    if (max <= 0) {
-        return found;
-    }
+    max = strlen(line);
 
-    cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", max, line);
-    /* Skip initial whitespace */
-    for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) {
-        last = lpc + 1;
-    }
+    cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", (int) max, line);
 
-    /* Now the actual content */
     for (lpc = 0; lpc <= max; lpc++) {
-        int a_space = isspace(line[lpc]);
-
-        if (a_space && lpc < max && isspace(line[lpc + 1])) {
-            /* fast-forward to the end of the spaces */
-
-        } else if (a_space || line[lpc] == ';' || line[lpc] == 0) {
-            int rc = 1;
-            char *entry = NULL;
+        if (isspace(line[lpc])) {
+            if (skip_space) {
+                last = lpc + 1;
+            } else {
+                space_run++;
+            }
+            continue;
+        }
+        skip_space = false;
+        if (line[lpc] == ';' || line[lpc] == 0) {
+            int rc = 0;
+            char *entry = calloc(1, 1 + lpc - last);
 
-            if (lpc > last) {
-                entry = calloc(1, 1 + lpc - last);
+            if (entry) {
                 rc = sscanf(line + last, "%[^;]", entry);
+            } else {
+                fprintf(stderr, "Heap allocation failed parsing device-line.\n");
+                exit(1);
             }
 
-            if (entry == NULL) {
-                /* Skip */
-            } else if (rc != 1) {
-                cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, line + last);
+            if (rc != 1) {
+                cl_log(LOG_WARNING, "Could not parse: '%s'", line + last);
             } else {
+                entry[strlen(entry)-space_run] = '\0';
                 cl_log(LOG_DEBUG, "Adding '%s'", entry);
                 recruit_servant(entry, 0);
                 found++;
             }
 
             free(entry);
+            skip_space = true;
             last = lpc + 1;
         }
+        space_run = 0;
     }
     return found;
 }
@@ -861,7 +890,7 @@
             int devices = parse_device_line(value);
             if(devices < 1) {
                 fprintf(stderr, "Invalid device line: %s\n", value);
-		exit_status = -2;
+                exit_status = -2;
                 goto out;
             }
 #else
@@ -1059,7 +1088,8 @@
 			break;
 		case 'h':
 			usage();
-			return (0);
+			goto out;
+			break;
 		default:
 			exit_status = -2;
 			goto out;
@@ -1212,6 +1242,9 @@
         }
         
   out:
+	if (timeout_action) {
+				free(timeout_action);
+	}
 	if (exit_status < 0) {
 		if (exit_status == -2) {
 			usage();
diff -Nru sbd-1.4.0/src/sbd-md.c sbd-1.4.0-18-g5e3283c/src/sbd-md.c
--- sbd-1.4.0/src/sbd-md.c	2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-md.c	2019-04-16 14:38:22.000000000 +0200
@@ -162,9 +162,9 @@
 
 	memset(&st->io, 0, sizeof(struct iocb));
 	if (rw) {
-		io_prep_pwrite(&st->io, st->devfd, data, sector_size, sector_size * sector);
+		io_prep_pwrite(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector);
 	} else {
-		io_prep_pread(&st->io, st->devfd, data, sector_size, sector_size * sector);
+		io_prep_pread(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector);
 	}
 
 	if (io_submit(st->ioctx, 1, ios) != 1) {
@@ -373,7 +373,6 @@
 	struct sector_header_s	*s_header;
 	struct sector_node_s	*s_node;
 	struct sector_mbox_s	*s_mbox;
-	struct stat 		s;
 	char			uuid[37];
 	int			i;
 	int			rc = 0;
@@ -394,10 +393,6 @@
 	uuid_generate(s_header->uuid);
 	uuid_unparse_lower(s_header->uuid, uuid);
 
-	fstat(st->devfd, &s);
-	/* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n",
-			s.st_size, s.st_blksize, s.st_blocks); */
-
 	cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)",
 			s_header->version, s_header->minor_version,
 			st->devfd, uuid);
@@ -1031,7 +1026,7 @@
 	return 0;
 }
 
-int servant(const char *diskname, int mode, const void* argp)
+int servant_md(const char *diskname, int mode, const void* argp)
 {
 	struct sector_mbox_s *s_mbox = NULL;
 	struct sector_node_s *s_node = NULL;
@@ -1046,11 +1041,6 @@
 	char uuid[37];
 	const struct servants_list_item *s = argp;
 
-	if (!diskname) {
-		cl_log(LOG_ERR, "Empty disk name %s.", diskname);
-		return -1;
-	}
-
 	cl_log(LOG_INFO, "Servant starting for device %s", diskname);
 
 	/* Block most of the signals */
@@ -1066,19 +1056,19 @@
 
 	st = open_device(diskname, LOG_WARNING);
 	if (!st) {
-		exit(EXIT_MD_IO_FAIL);
+		exit(EXIT_MD_SERVANT_IO_FAIL);
 	}
 
 	s_header = header_get(st);
 	if (!s_header) {
 		cl_log(LOG_ERR, "Not a valid header on %s", diskname);
-		exit(EXIT_MD_IO_FAIL);
+		exit(EXIT_MD_SERVANT_IO_FAIL);
 	}
 
 	if (servant_check_timeout_inconsistent(s_header) < 0) {
 		cl_log(LOG_ERR, "Timeouts on %s do not match first device",
 				diskname);
-		exit(EXIT_MD_IO_FAIL);
+		exit(EXIT_MD_SERVANT_IO_FAIL);
 	}
 
 	if (s_header->minor_version > 0) {
@@ -1091,14 +1081,14 @@
 		cl_log(LOG_ERR,
 		       "No slot allocated, and automatic allocation failed for disk %s.",
 		       diskname);
-		rc = EXIT_MD_IO_FAIL;
+		rc = EXIT_MD_SERVANT_IO_FAIL;
 		goto out;
 	}
 	s_node = sector_alloc();
 	if (slot_read(st, mbox, s_node) < 0) {
 		cl_log(LOG_ERR, "Unable to read node entry on %s",
 				diskname);
-		exit(EXIT_MD_IO_FAIL);
+		exit(EXIT_MD_SERVANT_IO_FAIL);
 	}
 
 	cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname);
@@ -1114,7 +1104,7 @@
 		if (mode > 0) {
 			if (mbox_read(st, mbox, s_mbox) < 0) {
 				cl_log(LOG_ERR, "mbox read failed during start-up in servant.");
-				rc = EXIT_MD_IO_FAIL;
+				rc = EXIT_MD_SERVANT_IO_FAIL;
 				goto out;
 			}
 			if (s_mbox->cmd != SBD_MSG_EXIT &&
@@ -1130,7 +1120,7 @@
 		DBGLOG(LOG_INFO, "First servant start - zeroing inbox");
 		memset(s_mbox, 0, sizeof(*s_mbox));
 		if (mbox_write(st, mbox, s_mbox) < 0) {
-			rc = EXIT_MD_IO_FAIL;
+			rc = EXIT_MD_SERVANT_IO_FAIL;
 			goto out;
 		}
 	}
@@ -1159,28 +1149,28 @@
 		s_header_retry = header_get(st);
 		if (!s_header_retry) {
 			cl_log(LOG_ERR, "No longer found a valid header on %s", diskname);
-			exit(EXIT_MD_IO_FAIL);
+			exit(EXIT_MD_SERVANT_IO_FAIL);
 		}
 		if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) {
 			cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname);
-			exit(EXIT_MD_IO_FAIL);
+			exit(EXIT_MD_SERVANT_IO_FAIL);
 		}
 		free(s_header_retry);
 
 		s_node_retry = sector_alloc();
 		if (slot_read(st, mbox, s_node_retry) < 0) {
 			cl_log(LOG_ERR, "slot read failed in servant.");
-			exit(EXIT_MD_IO_FAIL);
+			exit(EXIT_MD_SERVANT_IO_FAIL);
 		}
 		if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) {
 			cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname);
-			exit(EXIT_MD_IO_FAIL);
+			exit(EXIT_MD_SERVANT_IO_FAIL);
 		}
 		free(s_node_retry);
 
 		if (mbox_read(st, mbox, s_mbox) < 0) {
 			cl_log(LOG_ERR, "mbox read failed in servant.");
-			exit(EXIT_MD_IO_FAIL);
+			exit(EXIT_MD_SERVANT_IO_FAIL);
 		}
 
 		if (s_mbox->cmd > 0) {
@@ -1195,14 +1185,14 @@
 				sigqueue(ppid, SIG_TEST, signal_value);
 				break;
 			case SBD_MSG_RESET:
-				exit(EXIT_MD_REQUEST_RESET);
+				exit(EXIT_MD_SERVANT_REQUEST_RESET);
 			case SBD_MSG_OFF:
-				exit(EXIT_MD_REQUEST_SHUTOFF);
+				exit(EXIT_MD_SERVANT_REQUEST_SHUTOFF);
 			case SBD_MSG_EXIT:
 				sigqueue(ppid, SIG_EXITREQ, signal_value);
 				break;
 			case SBD_MSG_CRASHDUMP:
-				exit(EXIT_MD_REQUEST_CRASHDUMP);
+				exit(EXIT_MD_SERVANT_REQUEST_CRASHDUMP);
 			default:
 				/* FIXME:
 				   An "unknown" message might result
diff -Nru sbd-1.4.0/src/sbd-pacemaker.c sbd-1.4.0-18-g5e3283c/src/sbd-pacemaker.c
--- sbd-1.4.0/src/sbd-pacemaker.c	2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd-pacemaker.c	2019-04-16 14:38:22.000000000 +0200
@@ -103,6 +103,9 @@
 
 static long last_refresh = 0;
 
+static int pcmk_clean_shutdown = 0;
+static int pcmk_shutdown = 0;
+
 static gboolean
 mon_timer_reconnect(gpointer data)
 {
@@ -128,10 +131,26 @@
 {
 	if (cib) {
 		cib->cmds->signoff(cib);
+		/* retrigger as last one might have been skipped */
+		mon_refresh_state(NULL);
+		if (pcmk_clean_shutdown) {
+			/* assume a graceful pacemaker-shutdown */
+			clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN);
+		}
+		/* getting here we aren't sure about the pacemaker-state
+		   so try to use the timeout to reconnect and get
+		   everything sorted out again
+		 */
+		pcmk_shutdown = 0;
 		set_servant_health(pcmk_health_transient, LOG_WARNING, "Disconnected from CIB");
 		timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_reconnect, NULL);
 	}
 	cib_connected = 0;
+	/* no sense in looking into outdated cib, trying to apply patch, ... */
+	if (current_cib) {
+		free_xml(current_cib);
+		current_cib = NULL;
+	}
 	return;
 }
 
@@ -171,7 +190,7 @@
 mon_timer_notify(gpointer data)
 {
 	static int counter = 0;
-	int counter_max = timeout_watchdog / timeout_loop;
+	int counter_max = timeout_watchdog / timeout_loop / 2;
 
 	if (timer_id_notify > 0) {
 		g_source_remove(timer_id_notify);
@@ -257,7 +276,7 @@
     static int updates = 0;
     static int ever_had_quorum = FALSE;
 
-    node_t *node = pe_find_node(data_set->nodes, local_uname);
+    node_t *node = NULL;
 
     updates++;
 
@@ -267,11 +286,15 @@
         return;
     }
 
+    node = pe_find_node(data_set->nodes, local_uname);
 
-    if (node == NULL) {
+    if ((node == NULL) || (node->details == NULL)) {
         set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: %s is UNKNOWN", local_uname);
+        notify_parent();
+        return;
+    }
 
-    } else if (node->details->online == FALSE) {
+    if (node->details->online == FALSE) {
         set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: OFFLINE");
 
     } else if (node->details->unclean) {
@@ -280,11 +303,6 @@
     } else if (node->details->pending) {
         set_servant_health(pcmk_health_pending, LOG_WARNING, "Node state: pending");
 
-#if 0
-    } else if (node->details->shutdown) {
-        set_servant_health(pcmk_health_shutdown, LOG_WARNING, "Node state: shutting down");
-#endif
-
     } else if (data_set->flags & pe_flag_have_quorum) {
         set_servant_health(pcmk_health_online, LOG_INFO, "Node state: online");
         ever_had_quorum = TRUE;
@@ -315,6 +333,12 @@
         }
     }
 
+    if (node->details->shutdown) {
+        pcmk_shutdown = 1;
+    }
+    if (pcmk_shutdown && !(node->details->running_rsc)) {
+        pcmk_clean_shutdown = 1;
+    }
     notify_parent();
     return;
 }
@@ -339,7 +363,7 @@
         static mainloop_timer_t *refresh_timer = NULL;
 
         if(refresh_timer == NULL) {
-            refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
+            refresh_timer = mainloop_timer_add("refresh", reconnect_msec, FALSE, mon_trigger_refresh, NULL);
             refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_state, refresh_timer);
         }
 
@@ -369,9 +393,9 @@
 	}
 
     /* Refresh
-     * - immediately if the last update was more than 5s ago
+     * - immediately if the last update was more than 1s ago
      * - every 10 updates
-     * - at most 2s after the last update
+     * - at most 1s after the last update
      */
     if (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000)) {
         mon_refresh_state(refresh_timer);
diff -Nru sbd-1.4.0/src/sbd.sysconfig sbd-1.4.0-18-g5e3283c/src/sbd.sysconfig
--- sbd-1.4.0/src/sbd.sysconfig	2019-01-14 14:27:27.000000000 +0100
+++ sbd-1.4.0-18-g5e3283c/src/sbd.sysconfig	2019-04-16 14:38:22.000000000 +0200
@@ -68,6 +68,9 @@
 # If your sbd device(s) reside on a multipath setup or iSCSI, this
 # should be the time required to detect a path failure.
 #
+# Be aware that watchdog timeout set in the on-disk metadata takes
+# precedence.
+#
 SBD_WATCHDOG_TIMEOUT=5
 
 ## Type: string

Reply to: