Bug#861283: unblock: slurm-llnl/16.05.9-1

To: Debian Bug Tracking System <submit@bugs.debian.org>
Subject: Bug#861283: unblock: slurm-llnl/16.05.9-1
From: Mehdi Dogguy <mehdi@debian.org>
Date: Thu, 27 Apr 2017 00:43:29 +0200
Message-id: <[🔎] 149324660920.9926.8498200408620641936.reportbug@athena>
Reply-to: Mehdi Dogguy <mehdi@debian.org>, 861283@bugs.debian.org

Package: release.debian.org
Severity: normal
User: release.debian.org@packages.debian.org
Usertags: unblock

Slurm 16.05.9-1 has been uploaded to Unstable a while ago and is a bug
fix release. The diff is large but it contains many fixes (See summary
in upstream's NEWS file) and Slurm minor releases have always been
considered safe. Besides, Slurm 16.05.9-1 has stayed in Unstable for a
while now without issues.

Can you please consider unblocking slurm-llnl?

-- System Information:
Debian Release: 9.0
  APT prefers testing
  APT policy: (990, 'testing'), (500, 'unstable'), (1, 'experimental')
Architecture: amd64
 (x86_64)
Foreign Architectures: i386

Kernel: Linux 4.9.0-2-amd64 (SMP w/4 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)

diff -Nru slurm-llnl-16.05.8/debian/changelog slurm-llnl-16.05.9/debian/changelog
--- slurm-llnl-16.05.8/debian/changelog	2017-01-07 02:40:23.000000000 +0100
+++ slurm-llnl-16.05.9/debian/changelog	2017-02-03 09:50:02.000000000 +0100
@@ -1,3 +1,10 @@
+slurm-llnl (16.05.9-1) unstable; urgency=medium
+
+  * New upstream release
+  * Overrides spelling-error-in-binary false positives
+
+ -- Gennaro Oliva <oliva.g@na.icar.cnr.it>  Fri, 03 Feb 2017 09:50:02 +0100
+
 slurm-llnl (16.05.8-1) unstable; urgency=medium
 
   * New upstream release 
diff -Nru slurm-llnl-16.05.8/debian/libslurm30.lintian-overrides slurm-llnl-16.05.9/debian/libslurm30.lintian-overrides
--- slurm-llnl-16.05.8/debian/libslurm30.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/libslurm30.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -12,3 +12,4 @@
 # This happens because because slurm_job_preempt_mode is contained in
 # /usr/sbin/slurmctld and will never be referenced when running sinfo.
 hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/libslurmdb30.lintian-overrides slurm-llnl-16.05.9/debian/libslurmdb30.lintian-overrides
--- slurm-llnl-16.05.8/debian/libslurmdb30.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/libslurmdb30.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -12,3 +12,4 @@
 # This happens because because slurm_job_preempt_mode is contained in
 # /usr/sbin/slurmctld and will never be referenced when running sinfo.
 hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-client-emulator.lintian-overrides slurm-llnl-16.05.9/debian/slurm-client-emulator.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-client-emulator.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-client-emulator.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurm-client-emulator: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-client.lintian-overrides slurm-llnl-16.05.9/debian/slurm-client.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-client.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-client.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1,3 +1,4 @@
 slurm-client: manpage-has-errors-from-man
 slurm-client: conflicts-with-version
 slurm-client: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmctld.lintian-overrides slurm-llnl-16.05.9/debian/slurmctld.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmctld.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmctld.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1,2 +1,3 @@
 slurmctld: possible-documentation-but-no-doc-base-registration
 slurmctld: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmdbd.lintian-overrides slurm-llnl-16.05.9/debian/slurmdbd.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmdbd.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmdbd.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurmdbd: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmd.lintian-overrides slurm-llnl-16.05.9/debian/slurmd.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmd.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmd.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurmd: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-wlm-emulator.lintian-overrides slurm-llnl-16.05.9/debian/slurm-wlm-emulator.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-wlm-emulator.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-wlm-emulator.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurm-wlm-emulator: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/sview.lintian-overrides slurm-llnl-16.05.9/debian/sview.lintian-overrides
--- slurm-llnl-16.05.8/debian/sview.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/sview.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 sview: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/doc/html/prolog_epilog.shtml slurm-llnl-16.05.9/doc/html/prolog_epilog.shtml
--- slurm-llnl-16.05.8/doc/html/prolog_epilog.shtml	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/prolog_epilog.shtml	2017-01-31 20:56:34.000000000 +0100
@@ -130,7 +130,7 @@
 		</tr>
 	</tbody></table>
 </center>
-
+<br>
 <p>This second table below identifies what prologs and epilogs are available for job
 step allocations, when and where they run.</p>
 
diff -Nru slurm-llnl-16.05.8/doc/html/publications.shtml slurm-llnl-16.05.9/doc/html/publications.shtml
--- slurm-llnl-16.05.8/doc/html/publications.shtml	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/publications.shtml	2017-01-31 20:56:34.000000000 +0100
@@ -305,6 +305,8 @@
 Yiannis Georgiou and David Glesser (Bull),
 Krzysztof Rzadca (University of Warsaw),
 Denis Trystram (University Grenoble-Alpes)</li>
+
+<li><a href="SUG14/data_movement.pdf">High Performance Data movement between Lustre and Enterprise storage systems</a>
 Aamir Rashid (Terascala)</li>
 
 <li><a href="SUG14/remote_gpu.pdf">Extending Slurm with Support for Remote GPU Virtualization</a>
@@ -775,6 +777,6 @@
 Learning Chef: Compute Cluter with Slurm</a>
 A Slurm Cookbook by Adam DeConinck</p>
 
-<p style="text-align:center;">Last modified 29 November 2016</p>
+<p style="text-align:center;">Last modified 12 January 2017</p>
 
 <!--#include virtual="footer.txt"-->
diff -Nru slurm-llnl-16.05.8/doc/html/reset.css slurm-llnl-16.05.9/doc/html/reset.css
--- slurm-llnl-16.05.8/doc/html/reset.css	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/reset.css	2017-01-31 20:56:34.000000000 +0100
@@ -6,7 +6,7 @@
 b, u, i, center,
 ol, ul, li,
 fieldset, form, label, legend,
-table, caption, tbody, tfoot, thead, tr, th, td,
+caption, tbody, tfoot, thead, th,
 article, aside, canvas, details, embed,
 figure, figcaption, footer, header, hgroup,
 menu, nav, output, ruby, section, summary,
@@ -44,6 +44,5 @@
 }
 
 table {
-	border-collapse: collapse;
 	border-spacing: 0;
 }
diff -Nru slurm-llnl-16.05.8/doc/html/style.css slurm-llnl-16.05.9/doc/html/style.css
--- slurm-llnl-16.05.8/doc/html/style.css	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/style.css	2017-01-31 20:56:34.000000000 +0100
@@ -23,7 +23,6 @@
 .container {
 	margin: 0 auto;
 	padding: 0 18px;
-	max-width: 1400px;
 }
 
 .container--main {
@@ -661,6 +660,7 @@
 @media screen and (min-width: 32em) {
 	.container {
 		padding: 0 36px;
+		max-width: 100%;
 	}
 }
 
@@ -673,6 +673,7 @@
 
 	.container {
 		padding: 0 48px;
+		max-width: 90%;
 	}
 
 	.container--main {
@@ -732,7 +733,7 @@
 	}
 
 	.content .container {
-		padding: 0 8% 0 8%;
+		padding: 0 0 0 100px;
 		margin: 0;
 	}
 
@@ -772,6 +773,9 @@
 
 /* Extra Large Size */
 @media screen and (min-width: 78em) {
-
+	.container {
+		padding: 0 48px;
+		max-width: 90%;
+	}
 }
 
diff -Nru slurm-llnl-16.05.8/META slurm-llnl-16.05.9/META
--- slurm-llnl-16.05.8/META	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/META	2017-01-31 20:56:34.000000000 +0100
@@ -7,8 +7,8 @@
   Name:		slurm
   Major:	16
   Minor:	05
-  Micro:	8
-  Version:	16.05.8
+  Micro:	9
+  Version:	16.05.9
   Release:	1
 # Include leading zero for all pre-releases
 
diff -Nru slurm-llnl-16.05.8/NEWS slurm-llnl-16.05.9/NEWS
--- slurm-llnl-16.05.8/NEWS	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/NEWS	2017-01-31 20:56:34.000000000 +0100
@@ -1,6 +1,46 @@
 This file describes changes in recent versions of Slurm. It primarily
 documents those changes that are of interest to users and administrators.
 
+* Changes in Slurm 16.05.9
+==========================
+ -- Fix parsing of SBCAST_COMPRESS environment variable in sbcast.
+ -- Change some debug messages to errors in task/cgroup plugin.
+ -- backfill scheduler: Stop trying to determine expected start time for a job
+    after 2 seconds of wall time. This can happen if there are many running jobs
+    and a pending job can not be started soon.
+ -- Improve performance of cr_sort_part_rows() in cons_res plugin.
+ -- CRAY - Fix dealock issue when updating accounting in the slurmctld and
+    scheduling a Datawarp job.
+ -- Correct the job state accounting information for jobs requeued due to burst
+    buffer errors.
+ -- burst_buffer/cray - Avoid "pre_run" operation if not using buffer (i.e.
+    just creating or deleting a persistent burst buffer).
+ -- Fix slurm.spec file support for BlueGene builds.
+ -- Fix missing TRES read lock in acct_policy_job_runnable_pre_select() code.
+ -- Fix debug2 message printing value using wrong array index in
+    _qos_job_runnable_post_select().
+ -- Prevent job timeout on node power up.
+ -- MYSQL - Fix minor memory leak when querying steps and the sql fails.
+ -- Make it so sacctmgr accepts column headers like MaxTRESPU and not MaxTRESP.
+ -- Only look at SLURM_STEP_KILLED_MSG_NODE_ID on startup, to avoid race
+    condition later when looking at a steps env.
+ -- Make backfill scheduler behave like regular scheduler in respect to
+    'assoc_limit_stop'.
+ -- Allow a lower version client command to talk to a higher version contoller
+    using the multi-cluster options (e.g. squeue -M<clsuter>).
+ -- slurmctld/agent race condition fix: Prevent job launch while PrologSlurmctld
+    daemon is running or node boot in progress.
+ -- MYSQL - Fix a few other minor memory leaks when uncommon failures occur.
+ -- burst_buffer/cray - Fix race condition that could cause multiple batch job
+    launch requests resulting in drained nodes.
+ -- Correct logic to purge old reservations.
+ -- Fix DBD cache restore from previous versions.
+ -- Fix to logic for getting expected start time of existing job ID with
+    explicit begin time that is in the past.
+ -- Clear job's reason of "BeginTime" in a more timely fashion and/or prevents
+    them from being stuck in a PENDING state.
+ -- Make sure acct policy limits imposed on a job are correct after requeue.
+
 * Changes in Slurm 16.05.8
 ==========================
  -- Remove StoragePass from being printed out in the slurmdbd log at debug2
diff -Nru slurm-llnl-16.05.8/slurm.spec slurm-llnl-16.05.9/slurm.spec
--- slurm-llnl-16.05.8/slurm.spec	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/slurm.spec	2017-01-31 20:56:34.000000000 +0100
@@ -564,7 +564,6 @@
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/auth_none.so
 %endif
 %if ! %{slurm_with bluegene}
-rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/job_submit_cnode.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/libsched_if.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/libsched_if64.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/runjob_plugin.so
@@ -877,7 +876,6 @@
 %{_sbindir}/slurm_epilog
 %{_sbindir}/slurm_prolog
 %{_sbindir}/sfree
-%{_libdir}/slurm/job_submit_cnode.so
 %config %{_sysconfdir}/bluegene.conf.example
 %endif
 #############################################################################
diff -Nru slurm-llnl-16.05.8/src/common/slurmdbd_defs.c slurm-llnl-16.05.9/src/common/slurmdbd_defs.c
--- slurm-llnl-16.05.8/src/common/slurmdbd_defs.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/common/slurmdbd_defs.c	2017-01-31 20:56:34.000000000 +0100
@@ -2348,21 +2348,16 @@
 		   need to set it back to 0 */
 		set_buf_offset(buffer, 0);
 		safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer);
-		if (remaining_buf(buffer))
-			goto unpack_error;
 		debug3("Version string in dbd_state header is %s", ver_str);
+	unpack_error:
 		free_buf(buffer);
 		buffer = NULL;
-	unpack_error:
 		if (ver_str) {
-			char curr_ver_str[10];
-			snprintf(curr_ver_str, sizeof(curr_ver_str),
-				 "VER%d", SLURM_PROTOCOL_VERSION);
-			if (!xstrcmp(ver_str, curr_ver_str))
-				rpc_version = SLURM_PROTOCOL_VERSION;
+			/* get the version after VER */
+			rpc_version = slurm_atoul(ver_str + 3);
+			xfree(ver_str);
 		}
 
-		xfree(ver_str);
 		while (1) {
 			/* If the buffer was not the VER%d string it
 			   was an actual message so we don't want to
diff -Nru slurm-llnl-16.05.8/src/common/slurmdb_pack.c slurm-llnl-16.05.9/src/common/slurmdb_pack.c
--- slurm-llnl-16.05.8/src/common/slurmdb_pack.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/common/slurmdb_pack.c	2017-01-31 20:56:34.000000000 +0100
@@ -809,6 +809,8 @@
 			goto unpack_error;
 
 		safe_unpack16(&object_ptr->rpc_version, buffer);
+		object_ptr->rpc_version = MIN(SLURM_PROTOCOL_VERSION,
+					      object_ptr->rpc_version);
 		safe_unpackstr_xmalloc(&object_ptr->tres_str,
 				       &uint32_tmp, buffer);
 	} else if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
diff -Nru slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_convert.c slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_convert.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_convert.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_convert.c	2017-01-31 20:56:34.000000000 +0100
@@ -804,6 +804,7 @@
 			error("No grp_cpus col name in assoc_table "
 			      "for cluster %s, this should never happen",
 			      cluster_name);
+			mysql_free_result(result);
 			continue;
 		}
 
@@ -899,6 +900,7 @@
 		if (!(row = mysql_fetch_row(result)) || !row[0] || !row[0][0]) {
 			error("No count col name for cluster %s, "
 			      "this should never happen", cluster_name);
+			mysql_free_result(result);
 			continue;
 		}
 
diff -Nru slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c	2017-01-31 20:56:34.000000000 +0100
@@ -502,6 +502,7 @@
 		local_cluster_list = setup_cluster_list_with_inx(
 			mysql_conn, job_cond, (void **)&curr_cluster);
 		if (!local_cluster_list) {
+			mysql_free_result(result);
 			rc = SLURM_ERROR;
 			goto end_it;
 		}
@@ -785,6 +786,7 @@
 			      mysql_conn, query, 0))) {
 			xfree(query);
 			rc = SLURM_ERROR;
+			mysql_free_result(result);
 			goto end_it;
 		}
 		xfree(query);
diff -Nru slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_resource.c slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_resource.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_resource.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_resource.c	2017-01-31 20:56:34.000000000 +0100
@@ -312,6 +312,7 @@
 
 	if (!(row = mysql_fetch_row(result))) {
 		error("Resource id %u is not known on the system", res_id);
+		mysql_free_result(result);
 		return percent_used;
 	}
 
@@ -383,6 +384,7 @@
 
 	if (!(row = mysql_fetch_row(result))) {
 		error("Resource id %u is not known on the system", res->id);
+		mysql_free_result(result);
 		return SLURM_ERROR;
 	}
 
@@ -1100,6 +1102,8 @@
 
 	if (!query_clusters && !vals) {
 		xfree(clus_vals);
+		if (result)
+			mysql_free_result(result);
 		errno = SLURM_NO_CHANGE_IN_DATA;
 		error("Nothing to change");
 		return NULL;
diff -Nru slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_rollup.c slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_rollup.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_rollup.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_rollup.c	2017-01-31 20:56:34.000000000 +0100
@@ -1199,6 +1199,7 @@
 					      mysql_conn,
 					      query, 0))) {
 					rc = SLURM_ERROR;
+					mysql_free_result(result);
 					goto end_it;
 				}
 				xfree(query);
diff -Nru slurm-llnl-16.05.8/src/plugins/burst_buffer/cray/burst_buffer_cray.c slurm-llnl-16.05.9/src/plugins/burst_buffer/cray/burst_buffer_cray.c
--- slurm-llnl-16.05.8/src/plugins/burst_buffer/cray/burst_buffer_cray.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/burst_buffer/cray/burst_buffer_cray.c	2017-01-31 20:56:34.000000000 +0100
@@ -1205,8 +1205,8 @@
 		num_instances = 0;	/* Redundant, but fixes CLANG bug */
 	}
 	sessions = _bb_get_sessions(&num_sessions, &bb_state, timeout);
-	slurm_mutex_lock(&bb_state.bb_mutex);
 	assoc_mgr_lock(&assoc_locks);
+	slurm_mutex_lock(&bb_state.bb_mutex);
 	bb_state.last_load_time = time(NULL);
 	for (i = 0; i < num_sessions; i++) {
 		if (!init_config) {
@@ -1251,8 +1251,8 @@
 		if (bb_alloc->job_id == 0)
 			bb_post_persist_create(NULL, bb_alloc, &bb_state);
 	}
-	assoc_mgr_unlock(&assoc_locks);
 	slurm_mutex_unlock(&bb_state.bb_mutex);
+	assoc_mgr_unlock(&assoc_locks);
 	_bb_free_sessions(sessions, num_sessions);
 	_bb_free_instances(instances, num_instances);
 
@@ -3383,6 +3383,7 @@
 
 	/* Run "paths" function, get DataWarp environment variables */
 	if (_have_dw_cmd_opts(bb_job)) {
+		/* Setup "paths" operation */
 		if (bb_state.bb_config.validate_timeout)
 			timeout = bb_state.bb_config.validate_timeout * 1000;
 		else
@@ -3422,48 +3423,52 @@
 		}
 		xfree(resp_msg);
 		_free_script_argv(script_argv);
-	}
 
-	pre_run_argv = xmalloc(sizeof(char *) * 10);
-	pre_run_argv[0] = xstrdup("dw_wlm_cli");
-	pre_run_argv[1] = xstrdup("--function");
-	pre_run_argv[2] = xstrdup("pre_run");
-	pre_run_argv[3] = xstrdup("--token");
-	xstrfmtcat(pre_run_argv[4], "%u", job_ptr->job_id);
-	pre_run_argv[5] = xstrdup("--job");
-	xstrfmtcat(pre_run_argv[6], "%s/script", job_dir);
-	if (client_nodes_file_nid) {
+		/* Setup "pre_run" operation */
+		pre_run_argv = xmalloc(sizeof(char *) * 10);
+		pre_run_argv[0] = xstrdup("dw_wlm_cli");
+		pre_run_argv[1] = xstrdup("--function");
+		pre_run_argv[2] = xstrdup("pre_run");
+		pre_run_argv[3] = xstrdup("--token");
+		xstrfmtcat(pre_run_argv[4], "%u", job_ptr->job_id);
+		pre_run_argv[5] = xstrdup("--job");
+		xstrfmtcat(pre_run_argv[6], "%s/script", job_dir);
+		if (client_nodes_file_nid) {
 #if defined(HAVE_NATIVE_CRAY)
-		pre_run_argv[7] = xstrdup("--nidlistfile");
+			pre_run_argv[7] = xstrdup("--nidlistfile");
 #else
-		pre_run_argv[7] = xstrdup("--nodehostnamefile");
+			pre_run_argv[7] = xstrdup("--nodehostnamefile");
 #endif
-		pre_run_argv[8] = xstrdup(client_nodes_file_nid);
-	}
-	pre_run_args = xmalloc(sizeof(pre_run_args_t));
-	pre_run_args->args    = pre_run_argv;
-	pre_run_args->job_id  = job_ptr->job_id;
-	pre_run_args->timeout = bb_state.bb_config.other_timeout;
-	pre_run_args->user_id = job_ptr->user_id;
-	if (job_ptr->details)	/* Prevent launch until "pre_run" completes */
-		job_ptr->details->prolog_running++;
-
-	slurm_attr_init(&pre_run_attr);
-	if (pthread_attr_setdetachstate(&pre_run_attr, PTHREAD_CREATE_DETACHED))
-		error("pthread_attr_setdetachstate error %m");
-	while (pthread_create(&pre_run_tid, &pre_run_attr, _start_pre_run,
-			      pre_run_args)) {
-		if (errno != EAGAIN) {
-			error("%s: pthread_create: %m", __func__);
-			_start_pre_run(pre_run_argv);	/* Do in-line */
-			break;
+			pre_run_argv[8] = xstrdup(client_nodes_file_nid);
 		}
-		usleep(100000);
-	}
-	slurm_attr_destroy(&pre_run_attr);
+		pre_run_args = xmalloc(sizeof(pre_run_args_t));
+		pre_run_args->args    = pre_run_argv;
+		pre_run_args->job_id  = job_ptr->job_id;
+		pre_run_args->timeout = bb_state.bb_config.other_timeout;
+		pre_run_args->user_id = job_ptr->user_id;
+		if (job_ptr->details) {	/* Defer launch until completion */
+			job_ptr->details->prolog_running++;
+			job_ptr->job_state |= JOB_CONFIGURING;
+		}
+
+		slurm_attr_init(&pre_run_attr);
+		if (pthread_attr_setdetachstate(&pre_run_attr,
+						PTHREAD_CREATE_DETACHED))
+			error("pthread_attr_setdetachstate error %m");
+		while (pthread_create(&pre_run_tid, &pre_run_attr,
+				      _start_pre_run, pre_run_args)) {
+			if (errno != EAGAIN) {
+				error("%s: pthread_create: %m", __func__);
+				_start_pre_run(pre_run_argv);	/* Do in-line */
+				break;
+			}
+			usleep(100000);
+		}
+		slurm_attr_destroy(&pre_run_attr);
+}
 
-	xfree(job_dir);
 	xfree(client_nodes_file_nid);
+	xfree(job_dir);
 	return rc;
 }
 
@@ -3472,7 +3477,6 @@
 {
 	last_job_update = time(NULL);
 	job_ptr->end_time = last_job_update;
-	job_ptr->job_state = JOB_PENDING | JOB_COMPLETING;
 	if (hold_job)
 		job_ptr->priority = 0;
 	build_cg_bitmap(job_ptr);
@@ -3480,7 +3484,11 @@
 	job_ptr->state_reason = FAIL_BURST_BUFFER_OP;
 	xfree(job_ptr->state_desc);
 	job_ptr->state_desc = xstrdup("Burst buffer pre_run error");
-	job_completion_logger(job_ptr, false);
+
+	job_ptr->job_state  = JOB_REQUEUE;
+	job_completion_logger(job_ptr, true);
+	job_ptr->job_state = JOB_PENDING | JOB_COMPLETING;
+
 	deallocate_nodes(job_ptr, false, false, false);
 }
 
@@ -4066,6 +4074,7 @@
 			error("%s: unable to find job record for job %u",
 			      __func__, create_args->job_id);
 		}
+		assoc_mgr_lock(&assoc_locks);
 		slurm_mutex_lock(&bb_state.bb_mutex);
 		_reset_buf_state(create_args->user_id, create_args->job_id,
 				 create_args->name, BB_STATE_ALLOCATED,
@@ -4074,7 +4083,6 @@
 					     create_args->user_id);
 		bb_alloc->size = create_args->size;
 		bb_alloc->pool = xstrdup(create_args->pool);
-		assoc_mgr_lock(&assoc_locks);
 		if (job_ptr) {
 			bb_alloc->account   = xstrdup(job_ptr->account);
 			if (job_ptr->assoc_ptr) {
@@ -4120,8 +4128,8 @@
 		}
 		(void) bb_post_persist_create(job_ptr, bb_alloc, &bb_state);
 		bb_state.last_update_time = time(NULL);
-		assoc_mgr_unlock(&assoc_locks);
 		slurm_mutex_unlock(&bb_state.bb_mutex);
+		assoc_mgr_unlock(&assoc_locks);
 		unlock_slurmctld(job_write_lock);
 	}
 	xfree(resp_msg);
@@ -4204,6 +4212,9 @@
 		assoc_mgr_lock_t assoc_locks =
 			{ READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
 			  NO_LOCK, NO_LOCK, NO_LOCK };
+		/* assoc_mgr needs locking to call bb_post_persist_delete */
+		if (bb_alloc)
+			assoc_mgr_lock(&assoc_locks);
 		slurm_mutex_lock(&bb_state.bb_mutex);
 		_reset_buf_state(destroy_args->user_id, destroy_args->job_id,
 				 destroy_args->name, BB_STATE_DELETED, 0);
@@ -4216,14 +4227,14 @@
 			bb_limit_rem(bb_alloc->user_id, bb_alloc->size,
 				     bb_alloc->pool, &bb_state);
 
-			assoc_mgr_lock(&assoc_locks);
 			(void) bb_post_persist_delete(bb_alloc, &bb_state);
-			assoc_mgr_unlock(&assoc_locks);
 
 			(void) bb_free_alloc_rec(&bb_state, bb_alloc);
 		}
 		bb_state.last_update_time = time(NULL);
 		slurm_mutex_unlock(&bb_state.bb_mutex);
+		if (bb_alloc)
+			assoc_mgr_unlock(&assoc_locks);
 	}
 	xfree(resp_msg);
 	_free_create_args(destroy_args);
diff -Nru slurm-llnl-16.05.8/src/plugins/checkpoint/blcr/checkpoint_blcr.c slurm-llnl-16.05.9/src/plugins/checkpoint/blcr/checkpoint_blcr.c
--- slurm-llnl-16.05.8/src/plugins/checkpoint/blcr/checkpoint_blcr.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/checkpoint/blcr/checkpoint_blcr.c	2017-01-31 20:56:34.000000000 +0100
@@ -422,30 +422,29 @@
 	 */
 
 	/* set LD_PRELOAD for batch script shell */
-	//if (job->batch) {
-		old_env = getenvp(job->env, "LD_PRELOAD");
-		if (old_env) {
-			/* search and replace all libcr_run and libcr_omit
-			 * the old env value is messed up --
-			 * it will be replaced */
-			while ((ptr = strtok_r(old_env, " :", &save_ptr))) {
-				old_env = NULL;
-				if (!ptr)
-					break;
-				if (!xstrncmp(ptr, "libcr_run.so", 12) ||
-				    !xstrncmp(ptr, "libcr_omit.so", 13))
-					continue;
-				xstrcat(new_env, ptr);
-				xstrcat(new_env, ":");
-			}
+	old_env = getenvp(job->env, "LD_PRELOAD");
+	if (old_env) {
+		/* search and replace all libcr_run and libcr_omit
+		 * the old env value is messed up --
+		 * it will be replaced */
+		while ((ptr = strtok_r(old_env, " :", &save_ptr))) {
+			old_env = NULL;
+			if (!ptr)
+				break;
+			if (!xstrncmp(ptr, "libcr_run.so", 12) ||
+			    !xstrncmp(ptr, "libcr_omit.so", 13))
+				continue;
+			xstrcat(new_env, ptr);
+			xstrcat(new_env, ":");
 		}
-		ptr = xstrdup("libcr_run.so");
-		if (new_env)
-			xstrfmtcat(ptr, ":%s", new_env);
-		setenvf(&job->env, "LD_PRELOAD", ptr);
-		xfree(new_env);
-		xfree(ptr);
-		//}
+	}
+	ptr = xstrdup("libcr_run.so");
+	if (new_env)
+		xstrfmtcat(ptr, ":%s", new_env);
+	setenvf(&job->env, "LD_PRELOAD", ptr);
+	xfree(new_env);
+	xfree(ptr);
+
 	return SLURM_SUCCESS;
 }
 
diff -Nru slurm-llnl-16.05.8/src/plugins/sched/backfill/backfill.c slurm-llnl-16.05.9/src/plugins/sched/backfill/backfill.c
--- slurm-llnl-16.05.8/src/plugins/sched/backfill/backfill.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/sched/backfill/backfill.c	2017-01-31 20:56:34.000000000 +0100
@@ -135,6 +135,7 @@
 static int max_backfill_job_per_user = 0;
 static int max_backfill_jobs_start = 0;
 static bool backfill_continue = false;
+static bool assoc_limit_stop = false;
 static int defer_rpc_cnt = 0;
 static int sched_timeout = SCHED_TIMEOUT;
 static int yield_sleep   = YIELD_SLEEP;
@@ -639,6 +640,13 @@
 		backfill_continue = false;
 	}
 
+	if (sched_params && (strstr(sched_params, "assoc_limit_stop"))) {
+		assoc_limit_stop = true;
+	} else {
+		assoc_limit_stop = false;
+	}
+
+
 	if (sched_params &&
 	    (tmp_ptr = strstr(sched_params, "bf_yield_interval="))) {
 		sched_timeout = atoi(tmp_ptr + 18);
@@ -1127,7 +1135,8 @@
 		}
 
 		if (!acct_policy_job_runnable_state(job_ptr) &&
-		    !acct_policy_job_runnable_pre_select(job_ptr))
+		    (!assoc_limit_stop ||
+		     !acct_policy_job_runnable_pre_select(job_ptr)))
 			continue;
 
 		job_no_reserve = 0;
@@ -1847,8 +1856,19 @@
 		power_g_job_start(job_ptr);
 		if (job_ptr->batch_flag == 0)
 			srun_allocate(job_ptr->job_id);
-		else if ((job_ptr->details == NULL) ||
-			 (job_ptr->details->prolog_running == 0))
+		else if (
+#ifdef HAVE_BG
+				/* On a bluegene system we need to run the
+				 * prolog while the job is CONFIGURING so this
+				 * can't work off the CONFIGURING flag as done
+				 * elsewhere.
+				 */
+			!job_ptr->details ||
+			!job_ptr->details->prolog_running
+#else
+			!IS_JOB_CONFIGURING(job_ptr)
+#endif
+			)
 			launch_job(job_ptr);
 		slurmctld_diag_stats.backfilled_jobs++;
 		slurmctld_diag_stats.last_backfilled_jobs++;
diff -Nru slurm-llnl-16.05.8/src/plugins/select/cons_res/select_cons_res.c slurm-llnl-16.05.9/src/plugins/select/cons_res/select_cons_res.c
--- slurm-llnl-16.05.8/src/plugins/select/cons_res/select_cons_res.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/select/cons_res/select_cons_res.c	2017-01-31 20:56:34.000000000 +0100
@@ -544,21 +544,24 @@
 /* sort the rows of a partition from "most allocated" to "least allocated" */
 extern void cr_sort_part_rows(struct part_res_record *p_ptr)
 {
-	uint32_t i, j, a, b;
+	uint32_t i, j, b;
+	uint32_t a[p_ptr->num_rows];
 
 	if (!p_ptr->row)
 		return;
 
 	for (i = 0; i < p_ptr->num_rows; i++) {
 		if (p_ptr->row[i].row_bitmap)
-			a = bit_set_count(p_ptr->row[i].row_bitmap);
+			a[i] = bit_set_count(p_ptr->row[i].row_bitmap);
 		else
-			a = 0;
+			a[i] = 0;
+	}
+	for (i = 0; i < p_ptr->num_rows; i++) {
 		for (j = i+1; j < p_ptr->num_rows; j++) {
-			if (!p_ptr->row[j].row_bitmap)
-				continue;
-			b = bit_set_count(p_ptr->row[j].row_bitmap);
-			if (b > a) {
+			if (a[j] > a[i]) {
+				b = a[j];
+				a[j] = a[i];
+				a[i] = b;
 				_swap_rows(&(p_ptr->row[i]), &(p_ptr->row[j]));
 			}
 		}
@@ -1878,9 +1881,7 @@
 	    ((job_ptr->bit_flags & TEST_NOW_ONLY) == 0)) {
 		int time_window = 30;
 		bool more_jobs = true;
-		bool timed_out = false;
 		DEF_TIMERS;
-
 		list_sort(cr_job_list, _cr_job_list_sort);
 		START_TIMER;
 		job_iterator = list_iterator_create(cr_job_list);
@@ -1908,14 +1909,6 @@
 				last_job_ptr = tmp_job_ptr;
 				_rm_job_from_res(future_part, future_usage,
 						 tmp_job_ptr, 0);
-				if (timed_out) {
-					/* After timeout, remove ALL remaining
-					 * jobs and test if the pending job can
-					 * start, rather than executing the slow
-					 * cr_job_test() operation after
-					 * removing every 200 jobs */
-					continue;
-				}
 				if (rm_job_cnt++ > 200)
 					break;
 				next_job_ptr = list_peek_next(job_iterator);
@@ -1949,12 +1942,9 @@
 				}
 				break;
 			}
-			/* After 1 second of iterating over groups of running
-			 * jobs, simulate the termination of all remaining jobs
-			 * in order to determine if pending job can ever run */
 			END_TIMER;
-			if (DELTA_TIMER >= 1000000)
-				timed_out = true;
+			if (DELTA_TIMER >= 2000000)
+				break;	/* Quit after 2 seconds wall time */
 		}
 		list_iterator_destroy(job_iterator);
 	}
diff -Nru slurm-llnl-16.05.8/src/plugins/task/cgroup/task_cgroup_memory.c slurm-llnl-16.05.9/src/plugins/task/cgroup/task_cgroup_memory.c
--- slurm-llnl-16.05.8/src/plugins/task/cgroup/task_cgroup_memory.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/task/cgroup/task_cgroup_memory.c	2017-01-31 20:56:34.000000000 +0100
@@ -458,6 +458,7 @@
 		goto error;
 	}
 
+	fstatus = SLURM_SUCCESS;
 error:
 	xcgroup_unlock(&memory_cg);
 	xcgroup_destroy(&memory_cg);
diff -Nru slurm-llnl-16.05.8/src/sacctmgr/common.c slurm-llnl-16.05.9/src/sacctmgr/common.c
--- slurm-llnl-16.05.8/src/sacctmgr/common.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/sacctmgr/common.c	2017-01-31 20:56:34.000000000 +0100
@@ -405,8 +405,10 @@
 		field->name = xstrdup("MaxCPUsPU");
 		field->len = 9;
 		field->print_routine = print_fields_uint;
-	} else if (!strncasecmp("MaxTRESPerJob",
-				object, MAX(command_len, 7))) {
+	} else if (!strncasecmp("MaxTRES",
+				object, MAX(command_len, 7)) ||
+		   !strncasecmp("MaxTRESPerJob",
+				object, MAX(command_len, 11))) {
 		field->type = PRINT_MAXT;
 		field->name = xstrdup("MaxTRES");
 		field->len = 13;
@@ -452,7 +454,9 @@
 		field->len = 13;
 		field->print_routine = sacctmgr_print_tres;
 	} else if (!strncasecmp("MaxTRESPerUser", object,
-				MAX(command_len, 11))) {
+				MAX(command_len, 11)) ||
+		   !strncasecmp("MaxTRESPU", object,
+				MAX(command_len, 9))) {
 		field->type = PRINT_MAXTU;
 		field->name = xstrdup("MaxTRESPU");
 		field->len = 13;
@@ -473,9 +477,9 @@
 		field->len = 9;
 		field->print_routine = print_fields_uint;
 	} else if (!strncasecmp("MaxJobsPerUser", object,
-				MAX(command_len, 8)) ||
+				MAX(command_len, 11)) ||
 		   !strncasecmp("MaxJobsPU", object,
-				MAX(command_len, 8))) {
+				MAX(command_len, 9))) {
 		field->type = PRINT_MAXJ; /* used same as MaxJobs */
 		field->name = xstrdup("MaxJobsPU");
 		field->len = 9;
diff -Nru slurm-llnl-16.05.8/src/sbcast/opts.c slurm-llnl-16.05.9/src/sbcast/opts.c
--- slurm-llnl-16.05.8/src/sbcast/opts.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/sbcast/opts.c	2017-01-31 20:56:34.000000000 +0100
@@ -94,7 +94,7 @@
 		{NULL,        0,                 0, 0}
 	};
 
-	if (getenv("SBCAST_COMPRESS"))
+	if ((env_val = getenv("SBCAST_COMPRESS")))
 		params.compress = parse_compress_type(env_val);
 	if ( ( env_val = getenv("SBCAST_FANOUT") ) )
 		params.fanout = atoi(env_val);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/acct_policy.c slurm-llnl-16.05.9/src/slurmctld/acct_policy.c
--- slurm-llnl-16.05.8/src/slurmctld/acct_policy.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/acct_policy.c	2017-01-31 20:56:34.000000000 +0100
@@ -1765,7 +1765,7 @@
 		       qos_ptr->name,
 		       assoc_mgr_tres_name_array[tres_pos],
 		       qos_ptr->grp_tres_mins_ctld[tres_pos],
-		       tres_usage_mins[i]);
+		       tres_usage_mins[tres_pos]);
 		rc = false;
 		goto end_it;
 		break;
@@ -2741,7 +2741,7 @@
 			 * parent or not
 			 */
 	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
-				   NO_LOCK, NO_LOCK, NO_LOCK };
+				   READ_LOCK, NO_LOCK, NO_LOCK };
 
 	/* check to see if we are enforcing associations */
 	if (!accounting_enforce)
diff -Nru slurm-llnl-16.05.8/src/slurmctld/agent.c slurm-llnl-16.05.9/src/slurmctld/agent.c
--- slurm-llnl-16.05.8/src/slurmctld/agent.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/agent.c	2017-01-31 20:56:34.000000000 +0100
@@ -174,7 +174,12 @@
 	char *message;
 } mail_info_t;
 
-static void _sig_handler(int dummy);
+typedef struct retry_args {
+	bool mail_too;			/* Time to wait between retries */
+	int min_wait;			/* Send pending email too */
+} retry_args_t;
+
+static void *_agent_retry(void *arg);
 static int  _batch_launch_defer(queued_request_t *queued_req_ptr);
 static inline int _comm_err(char *node_name, slurm_msg_type_t msg_type);
 static void _list_delete_retry(void *retry_entry);
@@ -185,8 +190,9 @@
 		int no_resp_cnt, int retry_cnt);
 static void _purge_agent_args(agent_arg_t *agent_arg_ptr);
 static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count);
-static int _setup_requeue(agent_arg_t *agent_arg_ptr, thd_t *thread_ptr,
-			  int *count, int *spot);
+static int  _setup_requeue(agent_arg_t *agent_arg_ptr, thd_t *thread_ptr,
+			   int *count, int *spot);
+static void _sig_handler(int dummy);
 static void _spawn_retry_agent(agent_arg_t * agent_arg_ptr);
 static void *_thread_per_group_rpc(void *args);
 static int   _valid_agent_arg(agent_arg_t *agent_arg_ptr);
@@ -1261,17 +1267,41 @@
 }
 
 /*
- * agent_retry - Agent for retrying pending RPCs. One pending request is
+ * agent_retry - Spawn agent for retrying pending RPCs. One pending request is
  *	issued if it has been pending for at least min_wait seconds
  * IN min_wait - Minimum wait time between re-issue of a pending RPC
  * IN mail_too - Send pending email too, note this performed using a
  *	fork/waitpid, so it can take longer than just creating a pthread
  *	to send RPCs
- * RET count of queued requests remaining
  */
-extern int agent_retry (int min_wait, bool mail_too)
+extern void agent_retry(int min_wait, bool mail_too)
 {
-	int list_size = 0, rc;
+	pthread_attr_t thread_attr;
+	pthread_t thread_id = (pthread_t) 0;
+	retry_args_t *retry_args_ptr;
+
+	retry_args_ptr = xmalloc(sizeof(struct retry_args));
+	retry_args_ptr->mail_too = mail_too;
+	retry_args_ptr->min_wait = min_wait;
+
+	slurm_attr_init(&thread_attr);
+	if (pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_DETACHED))
+		error("pthread_attr_setdetachstate error %m");
+	if (pthread_create(&thread_id, &thread_attr, _agent_retry,
+			   (void *) retry_args_ptr)) {
+		error("pthread_create error %m");
+		xfree(retry_args_ptr);
+	}
+	slurm_attr_destroy(&thread_attr);
+}
+
+/* Do the work requested by agent_retry (retry pending RPCs).
+ * This is a separate thread so the job records can be locked */
+static void *_agent_retry(void *arg)
+{
+	retry_args_t *retry_args_ptr = (retry_args_t *) arg;
+	bool mail_too;
+	int min_wait, rc;
 	time_t now = time(NULL);
 	queued_request_t *queued_req_ptr = NULL;
 	agent_arg_t *agent_arg_ptr = NULL;
@@ -1279,17 +1309,26 @@
 	pthread_t thread_mail = 0;
 	pthread_attr_t attr_mail;
 	mail_info_t *mi = NULL;
+	/* Write lock on jobs */
+	slurmctld_lock_t job_write_lock =
+		{ NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
+
+	mail_too = retry_args_ptr->mail_too;
+	min_wait = retry_args_ptr->min_wait;
+	xfree(arg);
 
+	lock_slurmctld(job_write_lock);
 	slurm_mutex_lock(&retry_mutex);
 	if (retry_list) {
 		static time_t last_msg_time = (time_t) 0;
-		uint32_t msg_type[5] = {0, 0, 0, 0, 0}, i = 0;
+		uint32_t msg_type[5] = {0, 0, 0, 0, 0};
+		int i = 0, list_size;
 		list_size = list_count(retry_list);
 		if ((list_size > 100) &&
 		    (difftime(now, last_msg_time) > 300)) {
 			/* Note sizable backlog of work */
 			info("slurmctld: agent retry_list size is %d",
-				list_size);
+			     list_size);
 			retry_iter = list_iterator_create(retry_list);
 			while ((queued_req_ptr = (queued_request_t *)
 					list_next(retry_iter))) {
@@ -1311,13 +1350,13 @@
 		/* too much work already */
 		slurm_mutex_unlock(&agent_cnt_mutex);
 		slurm_mutex_unlock(&retry_mutex);
-		return list_size;
+		unlock_slurmctld(job_write_lock);
+		return NULL;
 	}
 	slurm_mutex_unlock(&agent_cnt_mutex);
 
 	if (retry_list) {
 		/* first try to find a new (never tried) record */
-
 		retry_iter = list_iterator_create(retry_list);
 		while ((queued_req_ptr = (queued_request_t *)
 				list_next(retry_iter))) {
@@ -1327,14 +1366,12 @@
 						  agent_arg_ptr);
 				xfree(queued_req_ptr);
 				list_remove(retry_iter);
-				list_size--;
 				continue;
 			}
 			if (rc > 0)
 				continue;
  			if (queued_req_ptr->last_attempt == 0) {
 				list_remove(retry_iter);
-				list_size--;
 				break;
 			}
 		}
@@ -1356,7 +1393,6 @@
 						  agent_arg_ptr);
 				xfree(queued_req_ptr);
 				list_remove(retry_iter);
-				list_size--;
 				continue;
 			}
 			if (rc > 0)
@@ -1364,13 +1400,13 @@
 			age = difftime(now, queued_req_ptr->last_attempt);
 			if (age > min_wait) {
 				list_remove(retry_iter);
-				list_size--;
 				break;
 			}
 		}
 		list_iterator_destroy(retry_iter);
 	}
 	slurm_mutex_unlock(&retry_mutex);
+	unlock_slurmctld(job_write_lock);
 
 	if (queued_req_ptr) {
 		agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
@@ -1406,7 +1442,7 @@
 		slurm_mutex_unlock(&agent_cnt_mutex);
 	}
 
-	return list_size;
+	return NULL;
 }
 
 /*
@@ -1823,7 +1859,7 @@
 	agent_arg_t *agent_arg_ptr;
 	batch_job_launch_msg_t *launch_msg_ptr;
 	time_t now = time(NULL);
-	struct job_record  *job_ptr;
+	struct job_record *job_ptr;
 	int nodes_ready = 0, tmp = 0;
 
 	agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
@@ -1845,6 +1881,9 @@
 		return -1;	/* job cancelled while waiting */
 	}
 
+	if (job_ptr->details && job_ptr->details->prolog_running)
+		return 1;
+
 	if (job_ptr->wait_all_nodes) {
 		(void) job_node_ready(launch_msg_ptr->job_id, &tmp);
 		if (tmp == (READY_JOB_STATE | READY_NODE_STATE)) {
@@ -1853,9 +1892,6 @@
 			    !xstrcmp(launch_msg_ptr->alias_list, "TBD")) {
 				/* Update launch RPC with correct node
 				 * aliases */
-				struct job_record *job_ptr;
-				job_ptr = find_job_record(launch_msg_ptr->
-							  job_id);
 				xfree(launch_msg_ptr->alias_list);
 				launch_msg_ptr->alias_list = xstrdup(job_ptr->
 								     alias_list);
@@ -1887,7 +1923,8 @@
 	}
 
 	if (nodes_ready) {
-		job_config_fini(job_ptr);
+		if (IS_JOB_CONFIGURING(job_ptr))
+			job_config_fini(job_ptr);
 		queued_req_ptr->last_attempt = (time_t) 0;
 		return 0;
 	}
diff -Nru slurm-llnl-16.05.8/src/slurmctld/agent.h slurm-llnl-16.05.9/src/slurmctld/agent.h
--- slurm-llnl-16.05.8/src/slurmctld/agent.h	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/agent.h	2017-01-31 20:56:34.000000000 +0100
@@ -85,11 +85,10 @@
  *	issued if it has been pending for at least min_wait seconds
  * IN min_wait - Minimum wait time between re-issue of a pending RPC
  * IN mail_too - Send pending email too, note this performed using a
- *		fork/waitpid, so it can take longer than just creating
- *		a pthread to send RPCs
- * RET count of queued requests remaining
+ *	fork/waitpid, so it can take longer than just creating a pthread
+ *	to send RPCs
  */
-extern int agent_retry (int min_wait, bool mail_too);
+extern void agent_retry(int min_wait, bool mail_too);
 
 /* agent_purge - purge all pending RPC requests */
 extern void agent_purge (void);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/job_mgr.c slurm-llnl-16.05.9/src/slurmctld/job_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/job_mgr.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/job_mgr.c	2017-01-31 20:56:34.000000000 +0100
@@ -7470,6 +7470,7 @@
 {
 	time_t now = time(NULL);
 
+	last_job_update = now;
 	job_ptr->job_state &= (~JOB_CONFIGURING);
 	job_ptr->tot_sus_time = difftime(now, job_ptr->start_time);
 	if ((job_ptr->time_limit != INFINITE) && (job_ptr->tot_sus_time != 0)) {
@@ -7486,9 +7487,20 @@
 	if (bit_overlap(job_ptr->node_bitmap, power_node_bitmap))
 		return false;
 
-	if (job_ptr->wait_all_nodes && 
-	    ((select_g_job_ready(job_ptr) & READY_NODE_STATE) == 0))
-		return false;
+	if (job_ptr->wait_all_nodes) {
+		/* Make sure all nodes ready to start job */
+		if ((select_g_job_ready(job_ptr) & READY_NODE_STATE) == 0)
+			return false;
+	} else if (job_ptr->batch_flag) {
+		/* Make first node is ready to start batch job */
+		int i_first = bit_ffs(job_ptr->node_bitmap);
+		struct node_record *node_ptr = node_record_table_ptr + i_first;
+		if ((i_first != -1) &&
+		    (IS_NODE_POWER_SAVE(node_ptr) ||
+		     IS_NODE_POWER_UP(node_ptr))) {
+			return false;
+		}
+	}
 
 	return true;
 }
@@ -12846,6 +12858,7 @@
 	if (job_ptr->alias_list && !xstrcmp(job_ptr->alias_list, "TBD") &&
 	    (prolog == 0) && job_ptr->node_bitmap &&
 	    (bit_overlap(power_node_bitmap, job_ptr->node_bitmap) == 0)) {
+		last_job_update = time(NULL);
 		job_ptr->job_state &= (~JOB_CONFIGURING);
 		set_job_alias_list(job_ptr);
 	}
@@ -14373,6 +14386,8 @@
 	 * to add it again. */
 	acct_policy_add_job_submit(job_ptr);
 
+	acct_policy_update_pending_job(job_ptr);
+
 	if (state & JOB_SPECIAL_EXIT) {
 		job_ptr->job_state |= JOB_SPECIAL_EXIT;
 		job_ptr->state_reason = WAIT_HELD_USER;
diff -Nru slurm-llnl-16.05.8/src/slurmctld/job_scheduler.c slurm-llnl-16.05.9/src/slurmctld/job_scheduler.c
--- slurm-llnl-16.05.8/src/slurmctld/job_scheduler.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/job_scheduler.c	2017-01-31 20:56:34.000000000 +0100
@@ -574,6 +574,7 @@
 	ListIterator job_iterator;
 	slurmctld_lock_t job_write_lock =
 		{ READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
+	time_t now = time(NULL);
 #ifdef HAVE_BG
 	static uint16_t cpus_per_node = 0;
 	if (!cpus_per_node)
@@ -591,7 +592,8 @@
 			continue;
 		if (part_ptr == NULL)
 			continue;
-		if ((job_ptr->details == NULL) || job_ptr->details->begin_time)
+		if ((job_ptr->details == NULL) ||
+		    (job_ptr->details->begin_time > now))
 			continue;
 		if ((part_ptr->state_up & PARTITION_SCHED) == 0)
 			continue;
@@ -863,8 +865,20 @@
 			info("sched: Allocate JobId=%u Partition=%s NodeList=%s #CPUs=%u",
 			     job_ptr->job_id, job_ptr->part_ptr->name,
 			     job_ptr->nodes, job_ptr->total_cpus);
-			if ((job_ptr->details->prolog_running == 0) &&
-			    ((job_ptr->bit_flags & NODE_REBOOT) == 0)) {
+
+			if (
+#ifdef HAVE_BG
+				/* On a bluegene system we need to run the
+				 * prolog while the job is CONFIGURING so this
+				 * can't work off the CONFIGURING flag as done
+				 * elsewhere.
+				 */
+				!job_ptr->details->prolog_running &&
+				!(job_ptr->bit_flags & NODE_REBOOT)
+#else
+				!IS_JOB_CONFIGURING(job_ptr)
+#endif
+				) {
 				launch_msg = build_launch_job_msg(job_ptr,
 							msg->protocol_version);
 			}
@@ -1842,10 +1856,20 @@
 #endif
 			if (job_ptr->batch_flag == 0)
 				srun_allocate(job_ptr->job_id);
-			else if ((job_ptr->details->prolog_running == 0) &&
-			         ((job_ptr->bit_flags & NODE_REBOOT) == 0)) {
+			else if (
+#ifdef HAVE_BG
+				/* On a bluegene system we need to run the
+				 * prolog while the job is CONFIGURING so this
+				 * can't work off the CONFIGURING flag as done
+				 * elsewhere.
+				 */
+				!job_ptr->details->prolog_running &&
+				!(job_ptr->bit_flags & NODE_REBOOT)
+#else
+				!IS_JOB_CONFIGURING(job_ptr)
+#endif
+				)
 				launch_job(job_ptr);
-			}
 			rebuild_job_part_list(job_ptr);
 			job_cnt++;
 			if (is_job_array_head &&
@@ -3181,7 +3205,8 @@
 	}
 
 	/* Enforce reservation: access control, time and nodes */
-	if (job_ptr->details->begin_time)
+	if (job_ptr->details->begin_time &&
+	    (job_ptr->details->begin_time > now))
 		start_res = job_ptr->details->begin_time;
 	else
 		start_res = now;
@@ -3753,10 +3778,10 @@
 		return errno;
 	}
 
-	if (job_ptr->details)
+	if (job_ptr->details) {
 		job_ptr->details->prolog_running++;
-
-	job_ptr->job_state |= JOB_CONFIGURING;
+		job_ptr->job_state |= JOB_CONFIGURING;
+	}
 
 	slurm_attr_init(&thread_attr_prolog);
 	pthread_attr_setdetachstate(&thread_attr_prolog,
diff -Nru slurm-llnl-16.05.8/src/slurmctld/node_mgr.c slurm-llnl-16.05.9/src/slurmctld/node_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/node_mgr.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/node_mgr.c	2017-01-31 20:56:34.000000000 +0100
@@ -3346,9 +3346,11 @@
 	char *host_str = NULL;
 	hostlist_t no_resp_hostlist = NULL;
 
-	for (i=0; i<node_record_count; i++) {
+	for (i = 0; i < node_record_count; i++) {
 		node_ptr = &node_record_table_ptr[i];
-		if (!node_ptr->not_responding)
+		if (!node_ptr->not_responding ||
+		    IS_NODE_POWER_SAVE(node_ptr) ||
+		    IS_NODE_POWER_UP(node_ptr))
 			continue;
 		if (no_resp_hostlist) {
 			(void) hostlist_push_host(no_resp_hostlist,
diff -Nru slurm-llnl-16.05.8/src/slurmctld/proc_req.c slurm-llnl-16.05.9/src/slurmctld/proc_req.c
--- slurm-llnl-16.05.8/src/slurmctld/proc_req.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/proc_req.c	2017-01-31 20:56:34.000000000 +0100
@@ -3381,8 +3381,20 @@
 				_throttle_fini(&active_rpc_cnt);
 				goto fini;
 			}
-			if (job_ptr->details &&
-			    job_ptr->details->prolog_running) {
+
+			if (
+#ifdef HAVE_BG
+				/* On a bluegene system we need to run the
+				 * prolog while the job is CONFIGURING so this
+				 * can't work off the CONFIGURING flag as done
+				 * elsewhere.
+				 */
+				job_ptr->details &&
+				job_ptr->details->prolog_running
+#else
+				IS_JOB_CONFIGURING(job_ptr)
+#endif
+				) {
 				slurm_send_rc_msg(msg, EAGAIN);
 				unlock_slurmctld(job_write_lock);
 				_throttle_fini(&active_rpc_cnt);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/reservation.c slurm-llnl-16.05.9/src/slurmctld/reservation.c
--- slurm-llnl-16.05.8/src/slurmctld/reservation.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/reservation.c	2017-01-31 20:56:34.000000000 +0100
@@ -5400,7 +5400,6 @@
 		}
 		_advance_resv_time(resv_ptr);
 		if ((resv_ptr->job_run_cnt    == 0) &&
-		    (resv_ptr->flags_set_node == false) &&
 		    ((resv_ptr->flags & RESERVE_FLAG_DAILY ) == 0) &&
 		    ((resv_ptr->flags & RESERVE_FLAG_WEEKLY) == 0)) {
 			if (resv_ptr->job_pend_cnt) {
diff -Nru slurm-llnl-16.05.8/src/slurmctld/step_mgr.c slurm-llnl-16.05.9/src/slurmctld/step_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/step_mgr.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/step_mgr.c	2017-01-31 20:56:34.000000000 +0100
@@ -981,11 +981,12 @@
 				return NULL;
 			}
 		}
-		if (job_ptr->details
-		    && job_ptr->details->prolog_running == 0) {
+		if (IS_JOB_CONFIGURING(job_ptr)) {
 			info("%s: Configuration for job %u is complete",
 			      __func__, job_ptr->job_id);
 			job_config_fini(job_ptr);
+			if (job_ptr->bit_flags & NODE_REBOOT)
+				job_validate_mem(job_ptr);
 		}
 	}
 
diff -Nru slurm-llnl-16.05.8/src/slurmd/common/xcgroup.c slurm-llnl-16.05.9/src/slurmd/common/xcgroup.c
--- slurm-llnl-16.05.8/src/slurmd/common/xcgroup.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/common/xcgroup.c	2017-01-31 20:56:34.000000000 +0100
@@ -449,7 +449,6 @@
 	char* file_path;
 	uid_t uid;
 	gid_t gid;
-	int create_only;
 	uint32_t notify;
 
 	/* init variables based on input cgroup */
@@ -457,7 +456,6 @@
 	file_path = cg->path;
 	uid = cg->uid;
 	gid = cg->gid;
-	create_only = 0;
 	notify = cg->notify;
 
 	/* save current mask and apply working one */
@@ -465,20 +463,23 @@
 	omask = umask(cmask);
 
 	/* build cgroup */
- 	if (mkdir(file_path, 0755)) {
-		if (create_only || errno != EEXIST) {
-			debug2("%s: unable to create cgroup '%s' : %m",
-			       __func__, file_path);
+	if (mkdir(file_path, 0755)) {
+		if (errno != EEXIST) {
+			error("%s: unable to create cgroup '%s' : %m",
+			      __func__, file_path);
 			umask(omask);
 			return fstatus;
+		} else {
+			debug("%s: cgroup '%s' already exists",
+			      __func__, file_path);
 		}
 	}
 	umask(omask);
 
 	/* change cgroup ownership as requested */
 	if (chown(file_path, uid, gid)) {
-		debug2("%s: unable to chown %d:%d cgroup '%s' : %m",
-		       __func__, uid, gid, file_path);
+		error("%s: unable to chown %d:%d cgroup '%s' : %m",
+		      __func__, uid, gid, file_path);
 		return fstatus;
 	}
 
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.c slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.c
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.c	2017-01-31 20:56:34.000000000 +0100
@@ -128,6 +128,7 @@
 static pthread_mutex_t message_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t message_cond = PTHREAD_COND_INITIALIZER;
 static int message_connections;
+static int msg_target_node_id = 0;
 
 /*
  *  Returns true if "uid" is a "slurm authorized user" - i.e. uid == 0
@@ -739,8 +740,6 @@
 	int errnum = 0;
 	int sig;
 	static int msg_sent = 0;
-	char *ptr = NULL;
-	int target_node_id = 0;
 	stepd_step_task_info_t *task;
 	uint32_t i;
 	uint32_t flag;
@@ -792,11 +791,8 @@
 		}
 	}
 
-	ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID");
-	if (ptr)
-		target_node_id = atoi(ptr);
 	if ((job->stepid != SLURM_EXTERN_CONT) &&
-	    (job->nodeid == target_node_id) && (msg_sent == 0) &&
+	    (job->nodeid == msg_target_node_id) && (msg_sent == 0) &&
 	    (job->state < SLURMSTEPD_STEP_ENDING)) {
 		time_t now = time(NULL);
 		char entity[24], time_str[24];
@@ -1818,3 +1814,10 @@
 		}
 	}
 }
+
+extern void set_msg_node_id(stepd_step_rec_t *job)
+{
+	char *ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID");
+	if (ptr)
+		msg_target_node_id = atoi(ptr);
+}
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.h slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.h
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.h	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.h	2017-01-31 20:56:34.000000000 +0100
@@ -46,4 +46,6 @@
 /* Delay until a job is resumed */
 extern void wait_for_resumed(uint16_t msg_type);
 
+extern void set_msg_node_id(stepd_step_rec_t *job);
+
 #endif /* _STEP_REQ_H */
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/slurmstepd.c slurm-llnl-16.05.9/src/slurmd/slurmstepd/slurmstepd.c
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/slurmstepd.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/slurmstepd.c	2017-01-31 20:56:34.000000000 +0100
@@ -581,6 +581,8 @@
 	env_array_overwrite(&job->env,"SLURM_TOPOLOGY_ADDR_PATTERN",
 			    conf->node_topo_pattern);
 
+	set_msg_node_id(job);
+
 	return job;
 }
 
diff -Nru slurm-llnl-16.05.8/src/srun/libsrun/allocate.c slurm-llnl-16.05.9/src/srun/libsrun/allocate.c
--- slurm-llnl-16.05.8/src/srun/libsrun/allocate.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/srun/libsrun/allocate.c	2017-01-31 20:56:34.000000000 +0100
@@ -877,6 +877,7 @@
 		j->power_flags = opt.power_flags;
 	if (opt.mcs_label)
 		j->mcs_label = opt.mcs_label;
+	j->wait_all_nodes = 1;
 
 	return j;
 }
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test10.13 slurm-llnl-16.05.9/testsuite/expect/test10.13
--- slurm-llnl-16.05.8/testsuite/expect/test10.13	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test10.13	2017-01-31 20:56:34.000000000 +0100
@@ -44,6 +44,11 @@
 
 print_header $test_id
 
+if { [test_bluegene] == 0 } {
+	send_user "\nWARNING: This test is only compatible with bluegene systems\n"
+	exit 0
+}
+
 if {[file exists $smap] == 0} {
 	send_user "\nWARNING: smap not installed\n"
 	exit 0
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test10.5 slurm-llnl-16.05.9/testsuite/expect/test10.5
--- slurm-llnl-16.05.8/testsuite/expect/test10.5	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test10.5	2017-01-31 20:56:34.000000000 +0100
@@ -42,6 +42,11 @@
 
 print_header $test_id
 
+if { [test_bluegene] == 0 } {
+	send_user "\nWARNING: This test is only compatible with bluegene systems\n"
+	exit 0
+}
+
 if {[file exists $smap] == 0} {
 	send_user "\nWARNING: smap not installed\n"
 	exit 0
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.14 slurm-llnl-16.05.9/testsuite/expect/test1.14
--- slurm-llnl-16.05.8/testsuite/expect/test1.14	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.14	2017-01-31 20:56:34.000000000 +0100
@@ -39,7 +39,6 @@
 set file_out        "test$test_id.output"
 set file_out2       "test$test_id.output2"
 set job_id           0
-set sleep_secs       10
 
 
 print_header $test_id
@@ -64,10 +63,15 @@
 exec $bin_rm -f $file_in $file_in2 $file_out $file_out2
 make_bash_script $file_in "
   echo tasks_per_node=\$SLURM_TASKS_PER_NODE
+  if \[ \$SLURM_TASKS_PER_NODE -gt 32 \]; then
+    sleep_secs=45
+  else
+    sleep_secs=10
+  fi
   inx=0
   while \[ \$inx -lt \$SLURM_TASKS_PER_NODE \]
   do
-    $srun --exclusive -n1 $bin_sleep $sleep_secs &
+    $srun --exclusive -n1 $bin_sleep \$sleep_secs &
     inx=\$((inx+1))
   done
   $bin_sleep 4
@@ -81,7 +85,7 @@
 #
 # Spawn a job via sbatch
 #
-spawn $sbatch -N1 -t1 --gres=craynetwork:0 --output=$file_out $file_in
+spawn $sbatch -N1 -t2 --gres=craynetwork:0 --output=$file_out $file_in
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
@@ -149,25 +153,30 @@
 # Delete left-over input script
 # Build another input script file
 # Run one more step than allocated CPUs with immediate option and make aborts
-# The "sleep 4" is meant to insure the earlier job steps start first
+# The "sleep" is meant to insure the earlier job steps start first
 #
 exec $bin_rm -f $file_in $file_out
 make_bash_script $file_in "
   inx=0
+  if \[ \$SLURM_TASKS_PER_NODE -gt 32 \]; then
+    sleep_secs=45
+  else
+    sleep_secs=10
+  fi
   while \[ \$inx -lt \$SLURM_TASKS_PER_NODE \]
   do
-    $srun --exclusive -n1 $bin_sleep $sleep_secs &
+    $srun --exclusive -n1 --mem=0 $bin_sleep \$sleep_secs &
     inx=\$((inx+1))
   done
   $bin_sleep 4
-  $srun -v --exclusive -n1 --immediate $file_in2 &
+  $srun -v --exclusive -n1 --mem=0 --immediate $file_in2 &
   wait
 "
 
 #
 # Spawn a job via sbatch
 #
-spawn $sbatch -N1 -t1 --gres=craynetwork:0 --output=$file_out2 $file_in
+spawn $sbatch -N1 -t2 --gres=craynetwork:0 --output=$file_out2 $file_in
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test12.2 slurm-llnl-16.05.9/testsuite/expect/test12.2
--- slurm-llnl-16.05.8/testsuite/expect/test12.2	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test12.2	2017-01-31 20:56:34.000000000 +0100
@@ -186,7 +186,7 @@
 	# Compute error in MB
 	set diff_io [expr $max_disk_write - $max_disk_read]
 	set error_io [expr abs($diff_io)]
-	if { $error_io > 0.05 } {
+	if { $error_io > 0.3 } {
 		send_user "\nFAILURE: written file size does not match read size "
 		send_user "file_size:$mb_file_size MB "
 		send_user "max_disk_write:$max_disk_write MB "
@@ -196,7 +196,7 @@
 
 	set diff_io [expr $ave_disk_write - $ave_disk_read]
 	set error_io [expr abs($diff_io)] 
-	if { $error_io > 0.05 } {
+	if { $error_io > 0.3 } {
 		send_user "\nFAILURE: average written file size "
 		send_user "does not match average read size "
 		send_user "file_size:$mb_file_size MB "
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test14.10 slurm-llnl-16.05.9/testsuite/expect/test14.10
--- slurm-llnl-16.05.8/testsuite/expect/test14.10	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test14.10	2017-01-31 20:56:34.000000000 +0100
@@ -91,10 +91,10 @@
 $srun -N1 -n1 -w$node2 ./$file_in2
 
 echo -n \"Checking node 1: \"
-$srun -N1 -n1 -w$node2 ls /tmp/$node2/test$test_id\_file
+$srun -Q -N1 -n1 -w$node2 ls /tmp/$node2/test$test_id\_file
 
 echo -n \"Checking node 0: \"
-$srun -N1 -n1 -w$node1 ls /tmp/$node1/test$test_id\_file
+$srun -Q -N1 -n1 -w$node1 ls /tmp/$node1/test$test_id\_file
 
 $srun $bin_rm -f /tmp/$node1/test$test_id\_file
 $srun $bin_rm -fr /tmp/$node1
@@ -107,6 +107,7 @@
 "
 
 # Make allocations
+set timeout $max_job_delay
 set matches 0
 spawn $salloc -N2 -w$hostlist -t1 ./$file_in1
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.52 slurm-llnl-16.05.9/testsuite/expect/test1.52
--- slurm-llnl-16.05.8/testsuite/expect/test1.52	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.52	2017-01-31 20:56:34.000000000 +0100
@@ -37,7 +37,7 @@
 set exit_code   0
 set num_nodes   2
 set num_tasks   2
-set node_count  0
+set idle_nodes  0
 set max_nodes   0
 set task_count  0
 set hostfile    "test$test_id.hostfile"
@@ -71,10 +71,6 @@
 		set max_nodes 999999
 		exp_continue
 	}
-	-re "TotalNodes=($number)" {
-		set node_count $expect_out(1,string)
-		exp_continue
-	}
 	timeout {
 		send_user "\nFAILURE: scontrol not responding\n"
 		exit 1
@@ -83,8 +79,14 @@
 		wait
 	}
 }
-if { ($node_count < 3) || ($max_nodes < 3) } {
-	send_user "WARNING: system must have at least 3 nodes to run this test on. $node_count $max_nodes\n"
+
+set idle_nodes [available_nodes $def_part idle]
+if { ($idle_nodes < 3) || ($max_nodes < 3) } {
+	if { $max_nodes == 999999 } {
+		send_user "WARNING: partition $def_part must have at least 3 idle nodes and MaxNodes >= 3 to run this test on. IDLE:$idle_nodes MaxNodes:UNLIMITED\n"
+	} else {
+		send_user "WARNING: partition $def_part must have at least 3 idle nodes and MaxNodes >= 3 to run this test on. IDLE:$idle_nodes MaxNodes:$max_nodes\n"
+	}
 	exit $exit_code
 }
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test15.22 slurm-llnl-16.05.9/testsuite/expect/test15.22
--- slurm-llnl-16.05.8/testsuite/expect/test15.22	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test15.22	2017-01-31 20:56:34.000000000 +0100
@@ -131,9 +131,15 @@
 #
 # Submit job explicitly to a non-default partition
 #
-set job_id           0
+set job_id          0
+set legit_failure   0
 set salloc_pid [spawn $salloc --partition=$other_part_name -t1 $bin_sleep 1]
 expect {
+	-re "Required node not available" {
+		set legit_failure 1
+		exec $bin_kill -INT $salloc_pid
+		exp_continue
+	}
 	-re "Granted job allocation ($number)" {
 		set job_id $expect_out(1,string)
 		exp_continue
@@ -151,7 +157,9 @@
 	}
 }
 # Confirm the job's partition
-if {$job_id == 0} {
+if {$legit_failure == 1} {
+	send_user "\nWARNING: partition '$other_part_name' is not usable\n"
+} elseif {$job_id == 0} {
 	send_user "\nFAILURE: batch submit failure\n"
 	set exit_code 1
 } else {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.63 slurm-llnl-16.05.9/testsuite/expect/test1.63
--- slurm-llnl-16.05.8/testsuite/expect/test1.63	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.63	2017-01-31 20:56:34.000000000 +0100
@@ -72,6 +72,7 @@
 	}
 	-re "Hello World!" {
 		incr match_run
+		sleep 0.1
 		exec $bin_kill -INT $srun_pid
 		exp_continue
 	}
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.34 slurm-llnl-16.05.9/testsuite/expect/test17.34
--- slurm-llnl-16.05.8/testsuite/expect/test17.34	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.34	2017-01-31 20:56:34.000000000 +0100
@@ -62,8 +62,14 @@
 	} else {
 		set task_limit 1
 	}
+
+	set ntasks [expr abs($task_limit + $task)]
+	if {$ntasks == 0} {
+		set ntasks 1
+	}
+
 	set error_chk 0
-	spawn $sbatch -t1 -w$node -S$core_spec -n[expr abs($task_limit + $task)] -o$file_out $spec_in
+	spawn $sbatch -t1 -w$node -S$core_spec -n$ntasks -o$file_out $spec_in
 	expect {
 		-re "Submitted batch job ($number)" {
 			set job_id $expect_out(1,string)
@@ -156,6 +162,19 @@
 
 print_header $test_id
 
+set select_type [test_select_type]
+if {![string compare $select_type "linear"] || ![string compare $select_type "serial"]} {
+	send_user "\nWARNING: This test is incompatible with select/$select_type\n"
+	exit 0
+} elseif {![string compare $select_type "cray"] && ![test_select_type_params "other_cons_res"]} {
+	send_user "\nWARNING: This test is incompatible with select/linear\n"
+	exit 0
+}
+if {[test_select_type_params "CR_SOCKET"]} {
+	send_user "\nWARNING: This test is incompatible with CR_SOCKET allocations\n"
+	exit 0
+}
+
 log_user 0
 set allow_spec 0
 spawn $scontrol show config
@@ -178,16 +197,6 @@
 	exit $exit_code
 }
 
-set select_type [test_select_type]
-if {![string compare $select_type "linear"]} {
-	send_user "\nWARNING: This test is incompatible with select/$select_type\n"
-	exit 0
-}
-if {[test_select_type_params "CR_SOCKET"]} {
-	send_user "\nWARNING: This test is incompatible with CR_SOCKET allocations\n"
-	exit 0
-}
-
 # Remove any vestigial files
 exec $bin_rm -f $file_in $file_out $spec_in
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.39 slurm-llnl-16.05.9/testsuite/expect/test17.39
--- slurm-llnl-16.05.8/testsuite/expect/test17.39	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.39	2017-01-31 20:56:34.000000000 +0100
@@ -35,8 +35,6 @@
 set slow_id       0
 set fast_id       0
 set dep_id        0
-set slow_job      "test$test_id\_slow_sc"
-set fast_job      "test$test_id\_fast_sc"
 set exit_code     0
 
 print_header $test_id
@@ -56,9 +54,6 @@
 	}
 }
 
-make_bash_script $slow_job "sleep 120"
-make_bash_script $fast_job "sleep 30"
-
 proc check_state {id state} {
 	global squeue exit_code
 
@@ -85,14 +80,8 @@
 	}
 }
 
-if {[test_select_type_params "MEMORY"]} {
-	set job_mem  10
-} else {
-	set job_mem  1
-}
-
 # Submit job 1 of 3
-spawn $sbatch -t3 -o/dev/null --mem=${job_mem} $slow_job
+spawn $sbatch -t3 -o /dev/null --wrap "sleep 120"
 expect {
 	-re "Submitted batch job ($number)" {
 		set slow_id $expect_out(1,string)
@@ -112,7 +101,7 @@
 }
 
 # Submit job 2 of 3
-spawn $sbatch -t3 -o/dev/null --mem=${job_mem} $fast_job
+spawn $sbatch -t3 -o /dev/null --wrap "sleep 30"
 expect {
 	-re "Node count specification invalid" {
 		send_user "\nWARNING: can't test with less than two nodes\n"
@@ -136,7 +125,7 @@
 }
 
 # Submit dependency job, 3 of 3
-spawn $sbatch --dependency=afterok:$slow_id?afterok:$fast_id -o/dev/null --mem=${job_mem} $slow_job
+spawn $sbatch --dependency=afterok:$slow_id?afterok:$fast_id -o /dev/null --wrap "sleep 120"
 expect {
 	-re "Submitted batch job ($number)" {
 		set dep_id $expect_out(1,string)
@@ -197,7 +186,6 @@
 cancel_job $dep_id
 
 if {$exit_code == 0} {
-	exec $bin_rm -f $slow_job $fast_job
 	send_user "\nSUCCESS\n"
 }
 exit $exit_code
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.74 slurm-llnl-16.05.9/testsuite/expect/test1.74
--- slurm-llnl-16.05.8/testsuite/expect/test1.74	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.74	2017-01-31 20:56:34.000000000 +0100
@@ -65,6 +65,10 @@
 	send_user "\nWARNING: This test is incompatible with serial systems\n"
 	exit $exit_code
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+	send_user "\nThis test can't be run without being an Accounting administrator.\n"
+	exit $exit_code
+}
 
 spawn $bin_id -u -n
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.40 slurm-llnl-16.05.9/testsuite/expect/test17.40
--- slurm-llnl-16.05.8/testsuite/expect/test17.40	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.40	2017-01-31 20:56:34.000000000 +0100
@@ -156,6 +156,19 @@
 
 print_header $test_id
 
+set select_type [test_select_type]
+if {![string compare $select_type "linear"] || ![string compare $select_type "serial"]} {
+	send_user "\nWARNING: This test is incompatible with select/$select_type\n"
+	exit 0
+} elseif {![string compare $select_type "cray"] && ![test_select_type_params "other_cons_res"]} {
+	send_user "\nWARNING: This test is incompatible with select/linear\n"
+	exit 0
+}
+if {[test_select_type_params "CR_SOCKET"]} {
+	send_user "\nWARNING: This test is incompatible with CR_SOCKET allocations\n"
+	exit 0
+}
+
 log_user 0
 set allow_spec 0
 spawn $scontrol show config
@@ -178,17 +191,6 @@
 	exit $exit_code
 }
 
-set select_type [test_select_type]
-if {![string compare $select_type "linear"] || ![string compare $select_type "serial"]} {
-	send_user "\nWARNING: This test is incompatible with select/$select_type\n"
-	exit 0
-}
-
-if {[test_select_type_params "CR_SOCKET"]} {
-	send_user "\nWARNING: This test is incompatible with CR_SOCKET allocations\n"
-	exit 0
-}
-
 # Remove any vestigial files
 exec $bin_rm -f $file_in $file_out $spec_in
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test21.36 slurm-llnl-16.05.9/testsuite/expect/test21.36
--- slurm-llnl-16.05.8/testsuite/expect/test21.36	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test21.36	2017-01-31 20:56:34.000000000 +0100
@@ -81,6 +81,10 @@
 	send_user "\nThis test can't be run without AccountStorageType=slurmdbd\n"
 	exit 0
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+	send_user "\nThis test can't be run without being an Accounting administrator.\n"
+	exit 0
+}
 
 # Remove pre-existing items
 cleanup
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test2.8 slurm-llnl-16.05.9/testsuite/expect/test2.8
--- slurm-llnl-16.05.8/testsuite/expect/test2.8	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test2.8	2017-01-31 20:56:34.000000000 +0100
@@ -35,7 +35,6 @@
 
 set test_id     "2.8"
 set exit_code   0
-set file_in     "test$test_id.input"
 set is_bluegene 0
 set job_id1     0
 set job_id2     0
@@ -65,26 +64,10 @@
 	set step_id 0
 }
 
-if {[test_select_type_params "MEMORY"]} {
-	set job_mem  20
-	set step_mem 10
-} else {
-	set job_mem  1
-	set step_mem 1
-}
-
-#
-# Build input script file
-#
-make_bash_script $file_in "
-  $srun --mem=${step_mem} $bin_sleep 60 &
-  $srun --mem=${step_mem} $bin_sleep 60
-"
-
 #
 # Submit a couple jobs so we have something to work with
 #
-set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --mem=${job_mem} $file_in]
+set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --wrap "$srun $bin_sleep 60"]
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id1 $expect_out(1,string)
@@ -104,7 +87,7 @@
 	exit 1
 }
 
-set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --mem=${job_mem} $file_in]
+set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --wrap "$srun $bin_sleep 60"]
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id2 $expect_out(1,string)
@@ -126,8 +109,6 @@
 	exit 1
 }
 
-exec $bin_rm -f $file_in
-
 if {[wait_for_job $job_id1 "RUNNING"] != 0} {
         send_user "\nFAILURE: waiting for job $job_id1 to start\n"
         cancel_job $job_id1
@@ -451,4 +432,3 @@
 	send_user "\nSUCCESS\n"
 }
 exit $exit_code
-
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test28.7 slurm-llnl-16.05.9/testsuite/expect/test28.7
--- slurm-llnl-16.05.8/testsuite/expect/test28.7	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test28.7	2017-01-31 20:56:34.000000000 +0100
@@ -33,7 +33,6 @@
 set test_id       "28.7"
 set exit_code     0
 set array_size    3
-set script        "test$test_id\.bash"
 set top_array_task_id [expr $array_size - 1]
 
 print_header $test_id
@@ -43,20 +42,11 @@
 	exit 0
 }
 
-if {[test_select_type_params "MEMORY"]} {
-	set job_mem  10
-} else {
-	set job_mem  1
-}
-
-exec $bin_rm -f $script
-make_bash_script $script "sleep \$(( ( RANDOM % 10 ) + 1 ))"
-
 #
 # Submit a job array for first dependency test
 #
 set job_id 0
-spawn $sbatch -N1 -t1 -a 0-$top_array_task_id -o /dev/null -e /dev/null --mem=${job_mem} $script
+spawn $sbatch -N1 -t1 -a 0-$top_array_task_id -o /dev/null -e /dev/null --wrap "sleep \$(( ( RANDOM % 10 ) + 1 ))"
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
@@ -80,7 +70,7 @@
 #
 set timeout $max_job_delay
 set match_job_state 0
-set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id\_$top_array_task_id --mem=${job_mem} $scontrol show job $job_id\_$top_array_task_id]
+set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id\_$top_array_task_id $scontrol show job $job_id\_$top_array_task_id]
 expect {
 	-re "JobState=COMPLETED|COMPLETING" {
 		set match_job_state 1
@@ -108,7 +98,7 @@
 # Submit a job array for second dependency test
 #
 set job_id 0
-spawn $sbatch -N1 -t1 -a 0-[expr $array_size - 1] -o /dev/null -e /dev/null --mem=${job_mem} $script
+spawn $sbatch -N1 -t1 -a 0-[expr $array_size - 1] -o /dev/null -e /dev/null --wrap "sleep \$(( ( RANDOM % 10 ) + 1 ))"
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
@@ -132,7 +122,7 @@
 #
 set timeout $max_job_delay
 set match_job_state 0
-set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id --mem=${job_mem} $scontrol show job $job_id]
+set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id $scontrol show job $job_id]
 expect {
 	-re "JobState=COMPLETED|COMPLETING" {
 		incr match_job_state
@@ -154,7 +144,6 @@
 
 cancel_job $job_id
 if {$exit_code == 0} {
-	exec $bin_rm -f $script
 	send_user "\nSUCCESS\n"
 }
 exit $exit_code
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test3.15 slurm-llnl-16.05.9/testsuite/expect/test3.15
--- slurm-llnl-16.05.8/testsuite/expect/test3.15	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test3.15	2017-01-31 20:56:34.000000000 +0100
@@ -32,7 +32,6 @@
 
 set test_id		"3.15"
 set exit_code		0
-set script_name         "test$test_id.bash"
 set license_name        "test$test_id"
 set resv_name           "resv$test_id"
 set user_name		""
@@ -57,10 +56,9 @@
 }
 
 proc submit_job { license_count } {
-	global script_name bin_sleep license_name sbatch number exit_code job_mem
+	global bin_sleep license_name sbatch number exit_code
 	set job_id 0
-	make_bash_script $script_name "$bin_sleep 300"
-	spawn $sbatch -n1 -t1 -o /dev/null -L $license_name:$license_count --mem=${job_mem} $script_name
+	spawn $sbatch -n1 -t1 -o /dev/null -L $license_name:$license_count --wrap "$bin_sleep 300"
 	expect {
 		-re "Submitted batch job ($number)" {
 			set job_id $expect_out(1,string)
@@ -187,12 +185,6 @@
 	exit $exit_code
 }
 
-if {[test_select_type_params "MEMORY"]} {
-	set job_mem  10
-} else {
-	set job_mem  1
-}
-
 spawn $bin_id -un
 expect {
 	-re "($alpha_numeric_under)" {
@@ -378,7 +370,7 @@
 reconfigure
 
 if {$exit_code == 0} {
-	exec $bin_rm -f $cwd/slurm.conf.orig $script_name
+	exec $bin_rm -f $cwd/slurm.conf.orig
 	send_user "\nSUCCESS\n"
 } else {
 	send_user "\nFAILURE\n"
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test35.2 slurm-llnl-16.05.9/testsuite/expect/test35.2
--- slurm-llnl-16.05.8/testsuite/expect/test35.2	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test35.2	2017-01-31 20:56:34.000000000 +0100
@@ -42,6 +42,28 @@
 set script_use  "test$test_id.use.bash"
 set tmp_file    "test$test_id"
 
+#
+# get my uid and clear any vestigial triggers
+#
+set uid -1
+spawn $bin_id -u
+expect {
+	-re "($number)" {
+		set uid $expect_out(1,string)
+		exp_continue
+	}
+	eof {
+		wait
+	}
+}
+if {$uid == -1} {
+	send_user "\nCan't get my uid\n"
+	exit 1
+} elseif {$uid == 0} {
+	send_user "\nWARNING: Can't run this test as user root\n"
+	exit 0
+}
+
 proc find_bb_jobid { fname bb_jobid } {
 	global bin_cat
 
@@ -142,6 +164,9 @@
 	set exit_code 1
 }
 
+# Wait for purge of buffer to complete
+sleep 10
+
 set found 0
 spawn $scontrol show burst
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test5.9 slurm-llnl-16.05.9/testsuite/expect/test5.9
--- slurm-llnl-16.05.8/testsuite/expect/test5.9	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test5.9	2017-01-31 20:56:34.000000000 +0100
@@ -98,6 +98,10 @@
 	send_user "\nWARNING: This test is incompatible with serial systems\n"
 	exit $exit_code
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+	send_user "\nThis test can't be run without being an Accounting administrator.\n"
+	exit $exit_code
+}
 set available [available_nodes $partition idle]
 if {$available < 2} {
     send_user "\nWARNING: not enough nodes currently available ($available avail, 2 needed)\n"
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test7.11 slurm-llnl-16.05.9/testsuite/expect/test7.11
--- slurm-llnl-16.05.8/testsuite/expect/test7.11	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test7.11	2017-01-31 20:56:34.000000000 +0100
@@ -69,10 +69,12 @@
 #
 log_user 0
 set config_dir ""
+set ctld_slurm_ver ""
 spawn $scontrol show config
 expect {
-	-re "SLURM_CONF.*= (/.*)/slurm.conf.*SLURM_VERSION" {
+	-re "SLURM_CONF.*= (.*)/slurm.conf.*SLURM_VERSION *= ($float)" {
 		set config_dir $expect_out(1,string)
+		set ctld_slurm_ver $expect_out(2,string)
 		exp_continue
 	}
 	eof {
@@ -84,6 +86,27 @@
 	send_user "\nFAILURE: Could not locate slurm.conf directory\n"
 	exit 1
 }
+
+log_user 0
+set loc_slurm_ver ""
+spawn $scontrol -V
+expect {
+	-re "slurm ($float)" {
+		set loc_slurm_ver $expect_out(1,string)
+		exp_continue
+	}
+	eof {
+		wait
+	}
+}
+log_user 1
+
+if {[string compare $ctld_slurm_ver $loc_slurm_ver]} {
+	send_user "\nWARNING: slurmctld ($ctld_slurm_ver) and local Slurm ($loc_slurm_ver) versions are not the same, can not continue.\n"
+	exit 0
+}
+
+
 set spank_conf_file ${config_dir}/plugstack.conf
 exec $bin_rm -f $orig_spank_conf $new_spank_conf $file_out $spank_out
 if {[file exists $spank_conf_file]} {
@@ -120,10 +143,6 @@
 	}
 }
 
-# Allow enough time for configuration file in NFS to be propagated
-# to all nodes of cluster
-exec sleep 60
-
 #
 # Test of srun help message
 #
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test7.13 slurm-llnl-16.05.9/testsuite/expect/test7.13
--- slurm-llnl-16.05.8/testsuite/expect/test7.13	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test7.13	2017-01-31 20:56:34.000000000 +0100
@@ -166,7 +166,7 @@
 		}
 	}
 	if {$matches != 4} {
-		send_user "\nFAILURE: sacct of $job_id failed ($matches != 5)\n"
+		send_user "\nFAILURE: sacct of $job_id failed ($matches != 4)\n"
 		exit 1
 	}
 }

Reply to:

Prev by Date: NEW changes in stable-new
Next by Date: Bug#841724: jessie-pu: package guile-2.0/2.0.11+1-9
Previous by thread: Bug#861280: ***SPAM*** Re: Bug#861280: jessie-pu: package caja/1.8.2-3+deb8u2
Next by thread: Bug#861287: unblock: dcfldd/1.3.4.1-11
Index(es):
- Date
- Thread