[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#861283: marked as done (unblock: slurm-llnl/16.05.9-1)



Your message dated Sun, 07 May 2017 20:03:00 +0000
with message-id <fa818182-5422-31a8-20f3-c1b75aba205f@thykier.net>
and subject line Re: unblock: slurm-llnl/16.05.9-1
has caused the Debian Bug report #861283,
regarding unblock: slurm-llnl/16.05.9-1
to be marked as done.

This means that you claim that the problem has been dealt with.
If this is not the case it is now your responsibility to reopen the
Bug report if necessary, and/or fix the problem forthwith.

(NB: If you are a system administrator and have no idea what this
message is talking about, this may indicate a serious mail system
misconfiguration somewhere. Please contact owner@bugs.debian.org
immediately.)


-- 
861283: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=861283
Debian Bug Tracking System
Contact owner@bugs.debian.org with problems
--- Begin Message ---
Package: release.debian.org
Severity: normal
User: release.debian.org@packages.debian.org
Usertags: unblock

Slurm 16.05.9-1 has been uploaded to Unstable a while ago and is a bug
fix release. The diff is large but it contains many fixes (See summary
in upstream's NEWS file) and Slurm minor releases have always been
considered safe. Besides, Slurm 16.05.9-1 has stayed in Unstable for a
while now without issues.

Can you please consider unblocking slurm-llnl?

-- System Information:
Debian Release: 9.0
  APT prefers testing
  APT policy: (990, 'testing'), (500, 'unstable'), (1, 'experimental')
Architecture: amd64
 (x86_64)
Foreign Architectures: i386

Kernel: Linux 4.9.0-2-amd64 (SMP w/4 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
diff -Nru slurm-llnl-16.05.8/debian/changelog slurm-llnl-16.05.9/debian/changelog
--- slurm-llnl-16.05.8/debian/changelog	2017-01-07 02:40:23.000000000 +0100
+++ slurm-llnl-16.05.9/debian/changelog	2017-02-03 09:50:02.000000000 +0100
@@ -1,3 +1,10 @@
+slurm-llnl (16.05.9-1) unstable; urgency=medium
+
+  * New upstream release
+  * Overrides spelling-error-in-binary false positives
+
+ -- Gennaro Oliva <oliva.g@na.icar.cnr.it>  Fri, 03 Feb 2017 09:50:02 +0100
+
 slurm-llnl (16.05.8-1) unstable; urgency=medium
 
   * New upstream release 
diff -Nru slurm-llnl-16.05.8/debian/libslurm30.lintian-overrides slurm-llnl-16.05.9/debian/libslurm30.lintian-overrides
--- slurm-llnl-16.05.8/debian/libslurm30.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/libslurm30.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -12,3 +12,4 @@
 # This happens because because slurm_job_preempt_mode is contained in
 # /usr/sbin/slurmctld and will never be referenced when running sinfo.
 hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/libslurmdb30.lintian-overrides slurm-llnl-16.05.9/debian/libslurmdb30.lintian-overrides
--- slurm-llnl-16.05.8/debian/libslurmdb30.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/libslurmdb30.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -12,3 +12,4 @@
 # This happens because because slurm_job_preempt_mode is contained in
 # /usr/sbin/slurmctld and will never be referenced when running sinfo.
 hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-client-emulator.lintian-overrides slurm-llnl-16.05.9/debian/slurm-client-emulator.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-client-emulator.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-client-emulator.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurm-client-emulator: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-client.lintian-overrides slurm-llnl-16.05.9/debian/slurm-client.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-client.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-client.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1,3 +1,4 @@
 slurm-client: manpage-has-errors-from-man
 slurm-client: conflicts-with-version
 slurm-client: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmctld.lintian-overrides slurm-llnl-16.05.9/debian/slurmctld.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmctld.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmctld.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1,2 +1,3 @@
 slurmctld: possible-documentation-but-no-doc-base-registration
 slurmctld: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmdbd.lintian-overrides slurm-llnl-16.05.9/debian/slurmdbd.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmdbd.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmdbd.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurmdbd: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmd.lintian-overrides slurm-llnl-16.05.9/debian/slurmd.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmd.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmd.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurmd: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-wlm-emulator.lintian-overrides slurm-llnl-16.05.9/debian/slurm-wlm-emulator.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-wlm-emulator.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-wlm-emulator.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurm-wlm-emulator: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/sview.lintian-overrides slurm-llnl-16.05.9/debian/sview.lintian-overrides
--- slurm-llnl-16.05.8/debian/sview.lintian-overrides	2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/sview.lintian-overrides	2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 sview: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/doc/html/prolog_epilog.shtml slurm-llnl-16.05.9/doc/html/prolog_epilog.shtml
--- slurm-llnl-16.05.8/doc/html/prolog_epilog.shtml	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/prolog_epilog.shtml	2017-01-31 20:56:34.000000000 +0100
@@ -130,7 +130,7 @@
 		</tr>
 	</tbody></table>
 </center>
-
+<br>
 <p>This second table below identifies what prologs and epilogs are available for job
 step allocations, when and where they run.</p>
 
diff -Nru slurm-llnl-16.05.8/doc/html/publications.shtml slurm-llnl-16.05.9/doc/html/publications.shtml
--- slurm-llnl-16.05.8/doc/html/publications.shtml	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/publications.shtml	2017-01-31 20:56:34.000000000 +0100
@@ -305,6 +305,8 @@
 Yiannis Georgiou and David Glesser (Bull),
 Krzysztof Rzadca (University of Warsaw),
 Denis Trystram (University Grenoble-Alpes)</li>
+
+<li><a href="SUG14/data_movement.pdf">High Performance Data movement between Lustre and Enterprise storage systems</a>
 Aamir Rashid (Terascala)</li>
 
 <li><a href="SUG14/remote_gpu.pdf">Extending Slurm with Support for Remote GPU Virtualization</a>
@@ -775,6 +777,6 @@
 Learning Chef: Compute Cluter with Slurm</a>
 A Slurm Cookbook by Adam DeConinck</p>
 
-<p style="text-align:center;">Last modified 29 November 2016</p>
+<p style="text-align:center;">Last modified 12 January 2017</p>
 
 <!--#include virtual="footer.txt"-->
diff -Nru slurm-llnl-16.05.8/doc/html/reset.css slurm-llnl-16.05.9/doc/html/reset.css
--- slurm-llnl-16.05.8/doc/html/reset.css	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/reset.css	2017-01-31 20:56:34.000000000 +0100
@@ -6,7 +6,7 @@
 b, u, i, center,
 ol, ul, li,
 fieldset, form, label, legend,
-table, caption, tbody, tfoot, thead, tr, th, td,
+caption, tbody, tfoot, thead, th,
 article, aside, canvas, details, embed,
 figure, figcaption, footer, header, hgroup,
 menu, nav, output, ruby, section, summary,
@@ -44,6 +44,5 @@
 }
 
 table {
-	border-collapse: collapse;
 	border-spacing: 0;
 }
diff -Nru slurm-llnl-16.05.8/doc/html/style.css slurm-llnl-16.05.9/doc/html/style.css
--- slurm-llnl-16.05.8/doc/html/style.css	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/style.css	2017-01-31 20:56:34.000000000 +0100
@@ -23,7 +23,6 @@
 .container {
 	margin: 0 auto;
 	padding: 0 18px;
-	max-width: 1400px;
 }
 
 .container--main {
@@ -661,6 +660,7 @@
 @media screen and (min-width: 32em) {
 	.container {
 		padding: 0 36px;
+		max-width: 100%;
 	}
 }
 
@@ -673,6 +673,7 @@
 
 	.container {
 		padding: 0 48px;
+		max-width: 90%;
 	}
 
 	.container--main {
@@ -732,7 +733,7 @@
 	}
 
 	.content .container {
-		padding: 0 8% 0 8%;
+		padding: 0 0 0 100px;
 		margin: 0;
 	}
 
@@ -772,6 +773,9 @@
 
 /* Extra Large Size */
 @media screen and (min-width: 78em) {
-
+	.container {
+		padding: 0 48px;
+		max-width: 90%;
+	}
 }
 
diff -Nru slurm-llnl-16.05.8/META slurm-llnl-16.05.9/META
--- slurm-llnl-16.05.8/META	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/META	2017-01-31 20:56:34.000000000 +0100
@@ -7,8 +7,8 @@
   Name:		slurm
   Major:	16
   Minor:	05
-  Micro:	8
-  Version:	16.05.8
+  Micro:	9
+  Version:	16.05.9
   Release:	1
 # Include leading zero for all pre-releases
 
diff -Nru slurm-llnl-16.05.8/NEWS slurm-llnl-16.05.9/NEWS
--- slurm-llnl-16.05.8/NEWS	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/NEWS	2017-01-31 20:56:34.000000000 +0100
@@ -1,6 +1,46 @@
 This file describes changes in recent versions of Slurm. It primarily
 documents those changes that are of interest to users and administrators.
 
+* Changes in Slurm 16.05.9
+==========================
+ -- Fix parsing of SBCAST_COMPRESS environment variable in sbcast.
+ -- Change some debug messages to errors in task/cgroup plugin.
+ -- backfill scheduler: Stop trying to determine expected start time for a job
+    after 2 seconds of wall time. This can happen if there are many running jobs
+    and a pending job can not be started soon.
+ -- Improve performance of cr_sort_part_rows() in cons_res plugin.
+ -- CRAY - Fix dealock issue when updating accounting in the slurmctld and
+    scheduling a Datawarp job.
+ -- Correct the job state accounting information for jobs requeued due to burst
+    buffer errors.
+ -- burst_buffer/cray - Avoid "pre_run" operation if not using buffer (i.e.
+    just creating or deleting a persistent burst buffer).
+ -- Fix slurm.spec file support for BlueGene builds.
+ -- Fix missing TRES read lock in acct_policy_job_runnable_pre_select() code.
+ -- Fix debug2 message printing value using wrong array index in
+    _qos_job_runnable_post_select().
+ -- Prevent job timeout on node power up.
+ -- MYSQL - Fix minor memory leak when querying steps and the sql fails.
+ -- Make it so sacctmgr accepts column headers like MaxTRESPU and not MaxTRESP.
+ -- Only look at SLURM_STEP_KILLED_MSG_NODE_ID on startup, to avoid race
+    condition later when looking at a steps env.
+ -- Make backfill scheduler behave like regular scheduler in respect to
+    'assoc_limit_stop'.
+ -- Allow a lower version client command to talk to a higher version contoller
+    using the multi-cluster options (e.g. squeue -M<clsuter>).
+ -- slurmctld/agent race condition fix: Prevent job launch while PrologSlurmctld
+    daemon is running or node boot in progress.
+ -- MYSQL - Fix a few other minor memory leaks when uncommon failures occur.
+ -- burst_buffer/cray - Fix race condition that could cause multiple batch job
+    launch requests resulting in drained nodes.
+ -- Correct logic to purge old reservations.
+ -- Fix DBD cache restore from previous versions.
+ -- Fix to logic for getting expected start time of existing job ID with
+    explicit begin time that is in the past.
+ -- Clear job's reason of "BeginTime" in a more timely fashion and/or prevents
+    them from being stuck in a PENDING state.
+ -- Make sure acct policy limits imposed on a job are correct after requeue.
+
 * Changes in Slurm 16.05.8
 ==========================
  -- Remove StoragePass from being printed out in the slurmdbd log at debug2
diff -Nru slurm-llnl-16.05.8/slurm.spec slurm-llnl-16.05.9/slurm.spec
--- slurm-llnl-16.05.8/slurm.spec	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/slurm.spec	2017-01-31 20:56:34.000000000 +0100
@@ -564,7 +564,6 @@
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/auth_none.so
 %endif
 %if ! %{slurm_with bluegene}
-rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/job_submit_cnode.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/libsched_if.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/libsched_if64.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/runjob_plugin.so
@@ -877,7 +876,6 @@
 %{_sbindir}/slurm_epilog
 %{_sbindir}/slurm_prolog
 %{_sbindir}/sfree
-%{_libdir}/slurm/job_submit_cnode.so
 %config %{_sysconfdir}/bluegene.conf.example
 %endif
 #############################################################################
diff -Nru slurm-llnl-16.05.8/src/common/slurmdbd_defs.c slurm-llnl-16.05.9/src/common/slurmdbd_defs.c
--- slurm-llnl-16.05.8/src/common/slurmdbd_defs.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/common/slurmdbd_defs.c	2017-01-31 20:56:34.000000000 +0100
@@ -2348,21 +2348,16 @@
 		   need to set it back to 0 */
 		set_buf_offset(buffer, 0);
 		safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer);
-		if (remaining_buf(buffer))
-			goto unpack_error;
 		debug3("Version string in dbd_state header is %s", ver_str);
+	unpack_error:
 		free_buf(buffer);
 		buffer = NULL;
-	unpack_error:
 		if (ver_str) {
-			char curr_ver_str[10];
-			snprintf(curr_ver_str, sizeof(curr_ver_str),
-				 "VER%d", SLURM_PROTOCOL_VERSION);
-			if (!xstrcmp(ver_str, curr_ver_str))
-				rpc_version = SLURM_PROTOCOL_VERSION;
+			/* get the version after VER */
+			rpc_version = slurm_atoul(ver_str + 3);
+			xfree(ver_str);
 		}
 
-		xfree(ver_str);
 		while (1) {
 			/* If the buffer was not the VER%d string it
 			   was an actual message so we don't want to
diff -Nru slurm-llnl-16.05.8/src/common/slurmdb_pack.c slurm-llnl-16.05.9/src/common/slurmdb_pack.c
--- slurm-llnl-16.05.8/src/common/slurmdb_pack.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/common/slurmdb_pack.c	2017-01-31 20:56:34.000000000 +0100
@@ -809,6 +809,8 @@
 			goto unpack_error;
 
 		safe_unpack16(&object_ptr->rpc_version, buffer);
+		object_ptr->rpc_version = MIN(SLURM_PROTOCOL_VERSION,
+					      object_ptr->rpc_version);
 		safe_unpackstr_xmalloc(&object_ptr->tres_str,
 				       &uint32_tmp, buffer);
 	} else if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
diff -Nru slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_convert.c slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_convert.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_convert.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_convert.c	2017-01-31 20:56:34.000000000 +0100
@@ -804,6 +804,7 @@
 			error("No grp_cpus col name in assoc_table "
 			      "for cluster %s, this should never happen",
 			      cluster_name);
+			mysql_free_result(result);
 			continue;
 		}
 
@@ -899,6 +900,7 @@
 		if (!(row = mysql_fetch_row(result)) || !row[0] || !row[0][0]) {
 			error("No count col name for cluster %s, "
 			      "this should never happen", cluster_name);
+			mysql_free_result(result);
 			continue;
 		}
 
diff -Nru slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c	2017-01-31 20:56:34.000000000 +0100
@@ -502,6 +502,7 @@
 		local_cluster_list = setup_cluster_list_with_inx(
 			mysql_conn, job_cond, (void **)&curr_cluster);
 		if (!local_cluster_list) {
+			mysql_free_result(result);
 			rc = SLURM_ERROR;
 			goto end_it;
 		}
@@ -785,6 +786,7 @@
 			      mysql_conn, query, 0))) {
 			xfree(query);
 			rc = SLURM_ERROR;
+			mysql_free_result(result);
 			goto end_it;
 		}
 		xfree(query);
diff -Nru slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_resource.c slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_resource.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_resource.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_resource.c	2017-01-31 20:56:34.000000000 +0100
@@ -312,6 +312,7 @@
 
 	if (!(row = mysql_fetch_row(result))) {
 		error("Resource id %u is not known on the system", res_id);
+		mysql_free_result(result);
 		return percent_used;
 	}
 
@@ -383,6 +384,7 @@
 
 	if (!(row = mysql_fetch_row(result))) {
 		error("Resource id %u is not known on the system", res->id);
+		mysql_free_result(result);
 		return SLURM_ERROR;
 	}
 
@@ -1100,6 +1102,8 @@
 
 	if (!query_clusters && !vals) {
 		xfree(clus_vals);
+		if (result)
+			mysql_free_result(result);
 		errno = SLURM_NO_CHANGE_IN_DATA;
 		error("Nothing to change");
 		return NULL;
diff -Nru slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_rollup.c slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_rollup.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_rollup.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_rollup.c	2017-01-31 20:56:34.000000000 +0100
@@ -1199,6 +1199,7 @@
 					      mysql_conn,
 					      query, 0))) {
 					rc = SLURM_ERROR;
+					mysql_free_result(result);
 					goto end_it;
 				}
 				xfree(query);
diff -Nru slurm-llnl-16.05.8/src/plugins/burst_buffer/cray/burst_buffer_cray.c slurm-llnl-16.05.9/src/plugins/burst_buffer/cray/burst_buffer_cray.c
--- slurm-llnl-16.05.8/src/plugins/burst_buffer/cray/burst_buffer_cray.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/burst_buffer/cray/burst_buffer_cray.c	2017-01-31 20:56:34.000000000 +0100
@@ -1205,8 +1205,8 @@
 		num_instances = 0;	/* Redundant, but fixes CLANG bug */
 	}
 	sessions = _bb_get_sessions(&num_sessions, &bb_state, timeout);
-	slurm_mutex_lock(&bb_state.bb_mutex);
 	assoc_mgr_lock(&assoc_locks);
+	slurm_mutex_lock(&bb_state.bb_mutex);
 	bb_state.last_load_time = time(NULL);
 	for (i = 0; i < num_sessions; i++) {
 		if (!init_config) {
@@ -1251,8 +1251,8 @@
 		if (bb_alloc->job_id == 0)
 			bb_post_persist_create(NULL, bb_alloc, &bb_state);
 	}
-	assoc_mgr_unlock(&assoc_locks);
 	slurm_mutex_unlock(&bb_state.bb_mutex);
+	assoc_mgr_unlock(&assoc_locks);
 	_bb_free_sessions(sessions, num_sessions);
 	_bb_free_instances(instances, num_instances);
 
@@ -3383,6 +3383,7 @@
 
 	/* Run "paths" function, get DataWarp environment variables */
 	if (_have_dw_cmd_opts(bb_job)) {
+		/* Setup "paths" operation */
 		if (bb_state.bb_config.validate_timeout)
 			timeout = bb_state.bb_config.validate_timeout * 1000;
 		else
@@ -3422,48 +3423,52 @@
 		}
 		xfree(resp_msg);
 		_free_script_argv(script_argv);
-	}
 
-	pre_run_argv = xmalloc(sizeof(char *) * 10);
-	pre_run_argv[0] = xstrdup("dw_wlm_cli");
-	pre_run_argv[1] = xstrdup("--function");
-	pre_run_argv[2] = xstrdup("pre_run");
-	pre_run_argv[3] = xstrdup("--token");
-	xstrfmtcat(pre_run_argv[4], "%u", job_ptr->job_id);
-	pre_run_argv[5] = xstrdup("--job");
-	xstrfmtcat(pre_run_argv[6], "%s/script", job_dir);
-	if (client_nodes_file_nid) {
+		/* Setup "pre_run" operation */
+		pre_run_argv = xmalloc(sizeof(char *) * 10);
+		pre_run_argv[0] = xstrdup("dw_wlm_cli");
+		pre_run_argv[1] = xstrdup("--function");
+		pre_run_argv[2] = xstrdup("pre_run");
+		pre_run_argv[3] = xstrdup("--token");
+		xstrfmtcat(pre_run_argv[4], "%u", job_ptr->job_id);
+		pre_run_argv[5] = xstrdup("--job");
+		xstrfmtcat(pre_run_argv[6], "%s/script", job_dir);
+		if (client_nodes_file_nid) {
 #if defined(HAVE_NATIVE_CRAY)
-		pre_run_argv[7] = xstrdup("--nidlistfile");
+			pre_run_argv[7] = xstrdup("--nidlistfile");
 #else
-		pre_run_argv[7] = xstrdup("--nodehostnamefile");
+			pre_run_argv[7] = xstrdup("--nodehostnamefile");
 #endif
-		pre_run_argv[8] = xstrdup(client_nodes_file_nid);
-	}
-	pre_run_args = xmalloc(sizeof(pre_run_args_t));
-	pre_run_args->args    = pre_run_argv;
-	pre_run_args->job_id  = job_ptr->job_id;
-	pre_run_args->timeout = bb_state.bb_config.other_timeout;
-	pre_run_args->user_id = job_ptr->user_id;
-	if (job_ptr->details)	/* Prevent launch until "pre_run" completes */
-		job_ptr->details->prolog_running++;
-
-	slurm_attr_init(&pre_run_attr);
-	if (pthread_attr_setdetachstate(&pre_run_attr, PTHREAD_CREATE_DETACHED))
-		error("pthread_attr_setdetachstate error %m");
-	while (pthread_create(&pre_run_tid, &pre_run_attr, _start_pre_run,
-			      pre_run_args)) {
-		if (errno != EAGAIN) {
-			error("%s: pthread_create: %m", __func__);
-			_start_pre_run(pre_run_argv);	/* Do in-line */
-			break;
+			pre_run_argv[8] = xstrdup(client_nodes_file_nid);
 		}
-		usleep(100000);
-	}
-	slurm_attr_destroy(&pre_run_attr);
+		pre_run_args = xmalloc(sizeof(pre_run_args_t));
+		pre_run_args->args    = pre_run_argv;
+		pre_run_args->job_id  = job_ptr->job_id;
+		pre_run_args->timeout = bb_state.bb_config.other_timeout;
+		pre_run_args->user_id = job_ptr->user_id;
+		if (job_ptr->details) {	/* Defer launch until completion */
+			job_ptr->details->prolog_running++;
+			job_ptr->job_state |= JOB_CONFIGURING;
+		}
+
+		slurm_attr_init(&pre_run_attr);
+		if (pthread_attr_setdetachstate(&pre_run_attr,
+						PTHREAD_CREATE_DETACHED))
+			error("pthread_attr_setdetachstate error %m");
+		while (pthread_create(&pre_run_tid, &pre_run_attr,
+				      _start_pre_run, pre_run_args)) {
+			if (errno != EAGAIN) {
+				error("%s: pthread_create: %m", __func__);
+				_start_pre_run(pre_run_argv);	/* Do in-line */
+				break;
+			}
+			usleep(100000);
+		}
+		slurm_attr_destroy(&pre_run_attr);
+}
 
-	xfree(job_dir);
 	xfree(client_nodes_file_nid);
+	xfree(job_dir);
 	return rc;
 }
 
@@ -3472,7 +3477,6 @@
 {
 	last_job_update = time(NULL);
 	job_ptr->end_time = last_job_update;
-	job_ptr->job_state = JOB_PENDING | JOB_COMPLETING;
 	if (hold_job)
 		job_ptr->priority = 0;
 	build_cg_bitmap(job_ptr);
@@ -3480,7 +3484,11 @@
 	job_ptr->state_reason = FAIL_BURST_BUFFER_OP;
 	xfree(job_ptr->state_desc);
 	job_ptr->state_desc = xstrdup("Burst buffer pre_run error");
-	job_completion_logger(job_ptr, false);
+
+	job_ptr->job_state  = JOB_REQUEUE;
+	job_completion_logger(job_ptr, true);
+	job_ptr->job_state = JOB_PENDING | JOB_COMPLETING;
+
 	deallocate_nodes(job_ptr, false, false, false);
 }
 
@@ -4066,6 +4074,7 @@
 			error("%s: unable to find job record for job %u",
 			      __func__, create_args->job_id);
 		}
+		assoc_mgr_lock(&assoc_locks);
 		slurm_mutex_lock(&bb_state.bb_mutex);
 		_reset_buf_state(create_args->user_id, create_args->job_id,
 				 create_args->name, BB_STATE_ALLOCATED,
@@ -4074,7 +4083,6 @@
 					     create_args->user_id);
 		bb_alloc->size = create_args->size;
 		bb_alloc->pool = xstrdup(create_args->pool);
-		assoc_mgr_lock(&assoc_locks);
 		if (job_ptr) {
 			bb_alloc->account   = xstrdup(job_ptr->account);
 			if (job_ptr->assoc_ptr) {
@@ -4120,8 +4128,8 @@
 		}
 		(void) bb_post_persist_create(job_ptr, bb_alloc, &bb_state);
 		bb_state.last_update_time = time(NULL);
-		assoc_mgr_unlock(&assoc_locks);
 		slurm_mutex_unlock(&bb_state.bb_mutex);
+		assoc_mgr_unlock(&assoc_locks);
 		unlock_slurmctld(job_write_lock);
 	}
 	xfree(resp_msg);
@@ -4204,6 +4212,9 @@
 		assoc_mgr_lock_t assoc_locks =
 			{ READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
 			  NO_LOCK, NO_LOCK, NO_LOCK };
+		/* assoc_mgr needs locking to call bb_post_persist_delete */
+		if (bb_alloc)
+			assoc_mgr_lock(&assoc_locks);
 		slurm_mutex_lock(&bb_state.bb_mutex);
 		_reset_buf_state(destroy_args->user_id, destroy_args->job_id,
 				 destroy_args->name, BB_STATE_DELETED, 0);
@@ -4216,14 +4227,14 @@
 			bb_limit_rem(bb_alloc->user_id, bb_alloc->size,
 				     bb_alloc->pool, &bb_state);
 
-			assoc_mgr_lock(&assoc_locks);
 			(void) bb_post_persist_delete(bb_alloc, &bb_state);
-			assoc_mgr_unlock(&assoc_locks);
 
 			(void) bb_free_alloc_rec(&bb_state, bb_alloc);
 		}
 		bb_state.last_update_time = time(NULL);
 		slurm_mutex_unlock(&bb_state.bb_mutex);
+		if (bb_alloc)
+			assoc_mgr_unlock(&assoc_locks);
 	}
 	xfree(resp_msg);
 	_free_create_args(destroy_args);
diff -Nru slurm-llnl-16.05.8/src/plugins/checkpoint/blcr/checkpoint_blcr.c slurm-llnl-16.05.9/src/plugins/checkpoint/blcr/checkpoint_blcr.c
--- slurm-llnl-16.05.8/src/plugins/checkpoint/blcr/checkpoint_blcr.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/checkpoint/blcr/checkpoint_blcr.c	2017-01-31 20:56:34.000000000 +0100
@@ -422,30 +422,29 @@
 	 */
 
 	/* set LD_PRELOAD for batch script shell */
-	//if (job->batch) {
-		old_env = getenvp(job->env, "LD_PRELOAD");
-		if (old_env) {
-			/* search and replace all libcr_run and libcr_omit
-			 * the old env value is messed up --
-			 * it will be replaced */
-			while ((ptr = strtok_r(old_env, " :", &save_ptr))) {
-				old_env = NULL;
-				if (!ptr)
-					break;
-				if (!xstrncmp(ptr, "libcr_run.so", 12) ||
-				    !xstrncmp(ptr, "libcr_omit.so", 13))
-					continue;
-				xstrcat(new_env, ptr);
-				xstrcat(new_env, ":");
-			}
+	old_env = getenvp(job->env, "LD_PRELOAD");
+	if (old_env) {
+		/* search and replace all libcr_run and libcr_omit
+		 * the old env value is messed up --
+		 * it will be replaced */
+		while ((ptr = strtok_r(old_env, " :", &save_ptr))) {
+			old_env = NULL;
+			if (!ptr)
+				break;
+			if (!xstrncmp(ptr, "libcr_run.so", 12) ||
+			    !xstrncmp(ptr, "libcr_omit.so", 13))
+				continue;
+			xstrcat(new_env, ptr);
+			xstrcat(new_env, ":");
 		}
-		ptr = xstrdup("libcr_run.so");
-		if (new_env)
-			xstrfmtcat(ptr, ":%s", new_env);
-		setenvf(&job->env, "LD_PRELOAD", ptr);
-		xfree(new_env);
-		xfree(ptr);
-		//}
+	}
+	ptr = xstrdup("libcr_run.so");
+	if (new_env)
+		xstrfmtcat(ptr, ":%s", new_env);
+	setenvf(&job->env, "LD_PRELOAD", ptr);
+	xfree(new_env);
+	xfree(ptr);
+
 	return SLURM_SUCCESS;
 }
 
diff -Nru slurm-llnl-16.05.8/src/plugins/sched/backfill/backfill.c slurm-llnl-16.05.9/src/plugins/sched/backfill/backfill.c
--- slurm-llnl-16.05.8/src/plugins/sched/backfill/backfill.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/sched/backfill/backfill.c	2017-01-31 20:56:34.000000000 +0100
@@ -135,6 +135,7 @@
 static int max_backfill_job_per_user = 0;
 static int max_backfill_jobs_start = 0;
 static bool backfill_continue = false;
+static bool assoc_limit_stop = false;
 static int defer_rpc_cnt = 0;
 static int sched_timeout = SCHED_TIMEOUT;
 static int yield_sleep   = YIELD_SLEEP;
@@ -639,6 +640,13 @@
 		backfill_continue = false;
 	}
 
+	if (sched_params && (strstr(sched_params, "assoc_limit_stop"))) {
+		assoc_limit_stop = true;
+	} else {
+		assoc_limit_stop = false;
+	}
+
+
 	if (sched_params &&
 	    (tmp_ptr = strstr(sched_params, "bf_yield_interval="))) {
 		sched_timeout = atoi(tmp_ptr + 18);
@@ -1127,7 +1135,8 @@
 		}
 
 		if (!acct_policy_job_runnable_state(job_ptr) &&
-		    !acct_policy_job_runnable_pre_select(job_ptr))
+		    (!assoc_limit_stop ||
+		     !acct_policy_job_runnable_pre_select(job_ptr)))
 			continue;
 
 		job_no_reserve = 0;
@@ -1847,8 +1856,19 @@
 		power_g_job_start(job_ptr);
 		if (job_ptr->batch_flag == 0)
 			srun_allocate(job_ptr->job_id);
-		else if ((job_ptr->details == NULL) ||
-			 (job_ptr->details->prolog_running == 0))
+		else if (
+#ifdef HAVE_BG
+				/* On a bluegene system we need to run the
+				 * prolog while the job is CONFIGURING so this
+				 * can't work off the CONFIGURING flag as done
+				 * elsewhere.
+				 */
+			!job_ptr->details ||
+			!job_ptr->details->prolog_running
+#else
+			!IS_JOB_CONFIGURING(job_ptr)
+#endif
+			)
 			launch_job(job_ptr);
 		slurmctld_diag_stats.backfilled_jobs++;
 		slurmctld_diag_stats.last_backfilled_jobs++;
diff -Nru slurm-llnl-16.05.8/src/plugins/select/cons_res/select_cons_res.c slurm-llnl-16.05.9/src/plugins/select/cons_res/select_cons_res.c
--- slurm-llnl-16.05.8/src/plugins/select/cons_res/select_cons_res.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/select/cons_res/select_cons_res.c	2017-01-31 20:56:34.000000000 +0100
@@ -544,21 +544,24 @@
 /* sort the rows of a partition from "most allocated" to "least allocated" */
 extern void cr_sort_part_rows(struct part_res_record *p_ptr)
 {
-	uint32_t i, j, a, b;
+	uint32_t i, j, b;
+	uint32_t a[p_ptr->num_rows];
 
 	if (!p_ptr->row)
 		return;
 
 	for (i = 0; i < p_ptr->num_rows; i++) {
 		if (p_ptr->row[i].row_bitmap)
-			a = bit_set_count(p_ptr->row[i].row_bitmap);
+			a[i] = bit_set_count(p_ptr->row[i].row_bitmap);
 		else
-			a = 0;
+			a[i] = 0;
+	}
+	for (i = 0; i < p_ptr->num_rows; i++) {
 		for (j = i+1; j < p_ptr->num_rows; j++) {
-			if (!p_ptr->row[j].row_bitmap)
-				continue;
-			b = bit_set_count(p_ptr->row[j].row_bitmap);
-			if (b > a) {
+			if (a[j] > a[i]) {
+				b = a[j];
+				a[j] = a[i];
+				a[i] = b;
 				_swap_rows(&(p_ptr->row[i]), &(p_ptr->row[j]));
 			}
 		}
@@ -1878,9 +1881,7 @@
 	    ((job_ptr->bit_flags & TEST_NOW_ONLY) == 0)) {
 		int time_window = 30;
 		bool more_jobs = true;
-		bool timed_out = false;
 		DEF_TIMERS;
-
 		list_sort(cr_job_list, _cr_job_list_sort);
 		START_TIMER;
 		job_iterator = list_iterator_create(cr_job_list);
@@ -1908,14 +1909,6 @@
 				last_job_ptr = tmp_job_ptr;
 				_rm_job_from_res(future_part, future_usage,
 						 tmp_job_ptr, 0);
-				if (timed_out) {
-					/* After timeout, remove ALL remaining
-					 * jobs and test if the pending job can
-					 * start, rather than executing the slow
-					 * cr_job_test() operation after
-					 * removing every 200 jobs */
-					continue;
-				}
 				if (rm_job_cnt++ > 200)
 					break;
 				next_job_ptr = list_peek_next(job_iterator);
@@ -1949,12 +1942,9 @@
 				}
 				break;
 			}
-			/* After 1 second of iterating over groups of running
-			 * jobs, simulate the termination of all remaining jobs
-			 * in order to determine if pending job can ever run */
 			END_TIMER;
-			if (DELTA_TIMER >= 1000000)
-				timed_out = true;
+			if (DELTA_TIMER >= 2000000)
+				break;	/* Quit after 2 seconds wall time */
 		}
 		list_iterator_destroy(job_iterator);
 	}
diff -Nru slurm-llnl-16.05.8/src/plugins/task/cgroup/task_cgroup_memory.c slurm-llnl-16.05.9/src/plugins/task/cgroup/task_cgroup_memory.c
--- slurm-llnl-16.05.8/src/plugins/task/cgroup/task_cgroup_memory.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/task/cgroup/task_cgroup_memory.c	2017-01-31 20:56:34.000000000 +0100
@@ -458,6 +458,7 @@
 		goto error;
 	}
 
+	fstatus = SLURM_SUCCESS;
 error:
 	xcgroup_unlock(&memory_cg);
 	xcgroup_destroy(&memory_cg);
diff -Nru slurm-llnl-16.05.8/src/sacctmgr/common.c slurm-llnl-16.05.9/src/sacctmgr/common.c
--- slurm-llnl-16.05.8/src/sacctmgr/common.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/sacctmgr/common.c	2017-01-31 20:56:34.000000000 +0100
@@ -405,8 +405,10 @@
 		field->name = xstrdup("MaxCPUsPU");
 		field->len = 9;
 		field->print_routine = print_fields_uint;
-	} else if (!strncasecmp("MaxTRESPerJob",
-				object, MAX(command_len, 7))) {
+	} else if (!strncasecmp("MaxTRES",
+				object, MAX(command_len, 7)) ||
+		   !strncasecmp("MaxTRESPerJob",
+				object, MAX(command_len, 11))) {
 		field->type = PRINT_MAXT;
 		field->name = xstrdup("MaxTRES");
 		field->len = 13;
@@ -452,7 +454,9 @@
 		field->len = 13;
 		field->print_routine = sacctmgr_print_tres;
 	} else if (!strncasecmp("MaxTRESPerUser", object,
-				MAX(command_len, 11))) {
+				MAX(command_len, 11)) ||
+		   !strncasecmp("MaxTRESPU", object,
+				MAX(command_len, 9))) {
 		field->type = PRINT_MAXTU;
 		field->name = xstrdup("MaxTRESPU");
 		field->len = 13;
@@ -473,9 +477,9 @@
 		field->len = 9;
 		field->print_routine = print_fields_uint;
 	} else if (!strncasecmp("MaxJobsPerUser", object,
-				MAX(command_len, 8)) ||
+				MAX(command_len, 11)) ||
 		   !strncasecmp("MaxJobsPU", object,
-				MAX(command_len, 8))) {
+				MAX(command_len, 9))) {
 		field->type = PRINT_MAXJ; /* used same as MaxJobs */
 		field->name = xstrdup("MaxJobsPU");
 		field->len = 9;
diff -Nru slurm-llnl-16.05.8/src/sbcast/opts.c slurm-llnl-16.05.9/src/sbcast/opts.c
--- slurm-llnl-16.05.8/src/sbcast/opts.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/sbcast/opts.c	2017-01-31 20:56:34.000000000 +0100
@@ -94,7 +94,7 @@
 		{NULL,        0,                 0, 0}
 	};
 
-	if (getenv("SBCAST_COMPRESS"))
+	if ((env_val = getenv("SBCAST_COMPRESS")))
 		params.compress = parse_compress_type(env_val);
 	if ( ( env_val = getenv("SBCAST_FANOUT") ) )
 		params.fanout = atoi(env_val);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/acct_policy.c slurm-llnl-16.05.9/src/slurmctld/acct_policy.c
--- slurm-llnl-16.05.8/src/slurmctld/acct_policy.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/acct_policy.c	2017-01-31 20:56:34.000000000 +0100
@@ -1765,7 +1765,7 @@
 		       qos_ptr->name,
 		       assoc_mgr_tres_name_array[tres_pos],
 		       qos_ptr->grp_tres_mins_ctld[tres_pos],
-		       tres_usage_mins[i]);
+		       tres_usage_mins[tres_pos]);
 		rc = false;
 		goto end_it;
 		break;
@@ -2741,7 +2741,7 @@
 			 * parent or not
 			 */
 	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
-				   NO_LOCK, NO_LOCK, NO_LOCK };
+				   READ_LOCK, NO_LOCK, NO_LOCK };
 
 	/* check to see if we are enforcing associations */
 	if (!accounting_enforce)
diff -Nru slurm-llnl-16.05.8/src/slurmctld/agent.c slurm-llnl-16.05.9/src/slurmctld/agent.c
--- slurm-llnl-16.05.8/src/slurmctld/agent.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/agent.c	2017-01-31 20:56:34.000000000 +0100
@@ -174,7 +174,12 @@
 	char *message;
 } mail_info_t;
 
-static void _sig_handler(int dummy);
+typedef struct retry_args {
+	bool mail_too;			/* Time to wait between retries */
+	int min_wait;			/* Send pending email too */
+} retry_args_t;
+
+static void *_agent_retry(void *arg);
 static int  _batch_launch_defer(queued_request_t *queued_req_ptr);
 static inline int _comm_err(char *node_name, slurm_msg_type_t msg_type);
 static void _list_delete_retry(void *retry_entry);
@@ -185,8 +190,9 @@
 		int no_resp_cnt, int retry_cnt);
 static void _purge_agent_args(agent_arg_t *agent_arg_ptr);
 static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count);
-static int _setup_requeue(agent_arg_t *agent_arg_ptr, thd_t *thread_ptr,
-			  int *count, int *spot);
+static int  _setup_requeue(agent_arg_t *agent_arg_ptr, thd_t *thread_ptr,
+			   int *count, int *spot);
+static void _sig_handler(int dummy);
 static void _spawn_retry_agent(agent_arg_t * agent_arg_ptr);
 static void *_thread_per_group_rpc(void *args);
 static int   _valid_agent_arg(agent_arg_t *agent_arg_ptr);
@@ -1261,17 +1267,41 @@
 }
 
 /*
- * agent_retry - Agent for retrying pending RPCs. One pending request is
+ * agent_retry - Spawn agent for retrying pending RPCs. One pending request is
  *	issued if it has been pending for at least min_wait seconds
  * IN min_wait - Minimum wait time between re-issue of a pending RPC
  * IN mail_too - Send pending email too, note this performed using a
  *	fork/waitpid, so it can take longer than just creating a pthread
  *	to send RPCs
- * RET count of queued requests remaining
  */
-extern int agent_retry (int min_wait, bool mail_too)
+extern void agent_retry(int min_wait, bool mail_too)
 {
-	int list_size = 0, rc;
+	pthread_attr_t thread_attr;
+	pthread_t thread_id = (pthread_t) 0;
+	retry_args_t *retry_args_ptr;
+
+	retry_args_ptr = xmalloc(sizeof(struct retry_args));
+	retry_args_ptr->mail_too = mail_too;
+	retry_args_ptr->min_wait = min_wait;
+
+	slurm_attr_init(&thread_attr);
+	if (pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_DETACHED))
+		error("pthread_attr_setdetachstate error %m");
+	if (pthread_create(&thread_id, &thread_attr, _agent_retry,
+			   (void *) retry_args_ptr)) {
+		error("pthread_create error %m");
+		xfree(retry_args_ptr);
+	}
+	slurm_attr_destroy(&thread_attr);
+}
+
+/* Do the work requested by agent_retry (retry pending RPCs).
+ * This is a separate thread so the job records can be locked */
+static void *_agent_retry(void *arg)
+{
+	retry_args_t *retry_args_ptr = (retry_args_t *) arg;
+	bool mail_too;
+	int min_wait, rc;
 	time_t now = time(NULL);
 	queued_request_t *queued_req_ptr = NULL;
 	agent_arg_t *agent_arg_ptr = NULL;
@@ -1279,17 +1309,26 @@
 	pthread_t thread_mail = 0;
 	pthread_attr_t attr_mail;
 	mail_info_t *mi = NULL;
+	/* Write lock on jobs */
+	slurmctld_lock_t job_write_lock =
+		{ NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
+
+	mail_too = retry_args_ptr->mail_too;
+	min_wait = retry_args_ptr->min_wait;
+	xfree(arg);
 
+	lock_slurmctld(job_write_lock);
 	slurm_mutex_lock(&retry_mutex);
 	if (retry_list) {
 		static time_t last_msg_time = (time_t) 0;
-		uint32_t msg_type[5] = {0, 0, 0, 0, 0}, i = 0;
+		uint32_t msg_type[5] = {0, 0, 0, 0, 0};
+		int i = 0, list_size;
 		list_size = list_count(retry_list);
 		if ((list_size > 100) &&
 		    (difftime(now, last_msg_time) > 300)) {
 			/* Note sizable backlog of work */
 			info("slurmctld: agent retry_list size is %d",
-				list_size);
+			     list_size);
 			retry_iter = list_iterator_create(retry_list);
 			while ((queued_req_ptr = (queued_request_t *)
 					list_next(retry_iter))) {
@@ -1311,13 +1350,13 @@
 		/* too much work already */
 		slurm_mutex_unlock(&agent_cnt_mutex);
 		slurm_mutex_unlock(&retry_mutex);
-		return list_size;
+		unlock_slurmctld(job_write_lock);
+		return NULL;
 	}
 	slurm_mutex_unlock(&agent_cnt_mutex);
 
 	if (retry_list) {
 		/* first try to find a new (never tried) record */
-
 		retry_iter = list_iterator_create(retry_list);
 		while ((queued_req_ptr = (queued_request_t *)
 				list_next(retry_iter))) {
@@ -1327,14 +1366,12 @@
 						  agent_arg_ptr);
 				xfree(queued_req_ptr);
 				list_remove(retry_iter);
-				list_size--;
 				continue;
 			}
 			if (rc > 0)
 				continue;
  			if (queued_req_ptr->last_attempt == 0) {
 				list_remove(retry_iter);
-				list_size--;
 				break;
 			}
 		}
@@ -1356,7 +1393,6 @@
 						  agent_arg_ptr);
 				xfree(queued_req_ptr);
 				list_remove(retry_iter);
-				list_size--;
 				continue;
 			}
 			if (rc > 0)
@@ -1364,13 +1400,13 @@
 			age = difftime(now, queued_req_ptr->last_attempt);
 			if (age > min_wait) {
 				list_remove(retry_iter);
-				list_size--;
 				break;
 			}
 		}
 		list_iterator_destroy(retry_iter);
 	}
 	slurm_mutex_unlock(&retry_mutex);
+	unlock_slurmctld(job_write_lock);
 
 	if (queued_req_ptr) {
 		agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
@@ -1406,7 +1442,7 @@
 		slurm_mutex_unlock(&agent_cnt_mutex);
 	}
 
-	return list_size;
+	return NULL;
 }
 
 /*
@@ -1823,7 +1859,7 @@
 	agent_arg_t *agent_arg_ptr;
 	batch_job_launch_msg_t *launch_msg_ptr;
 	time_t now = time(NULL);
-	struct job_record  *job_ptr;
+	struct job_record *job_ptr;
 	int nodes_ready = 0, tmp = 0;
 
 	agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
@@ -1845,6 +1881,9 @@
 		return -1;	/* job cancelled while waiting */
 	}
 
+	if (job_ptr->details && job_ptr->details->prolog_running)
+		return 1;
+
 	if (job_ptr->wait_all_nodes) {
 		(void) job_node_ready(launch_msg_ptr->job_id, &tmp);
 		if (tmp == (READY_JOB_STATE | READY_NODE_STATE)) {
@@ -1853,9 +1892,6 @@
 			    !xstrcmp(launch_msg_ptr->alias_list, "TBD")) {
 				/* Update launch RPC with correct node
 				 * aliases */
-				struct job_record *job_ptr;
-				job_ptr = find_job_record(launch_msg_ptr->
-							  job_id);
 				xfree(launch_msg_ptr->alias_list);
 				launch_msg_ptr->alias_list = xstrdup(job_ptr->
 								     alias_list);
@@ -1887,7 +1923,8 @@
 	}
 
 	if (nodes_ready) {
-		job_config_fini(job_ptr);
+		if (IS_JOB_CONFIGURING(job_ptr))
+			job_config_fini(job_ptr);
 		queued_req_ptr->last_attempt = (time_t) 0;
 		return 0;
 	}
diff -Nru slurm-llnl-16.05.8/src/slurmctld/agent.h slurm-llnl-16.05.9/src/slurmctld/agent.h
--- slurm-llnl-16.05.8/src/slurmctld/agent.h	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/agent.h	2017-01-31 20:56:34.000000000 +0100
@@ -85,11 +85,10 @@
  *	issued if it has been pending for at least min_wait seconds
  * IN min_wait - Minimum wait time between re-issue of a pending RPC
  * IN mail_too - Send pending email too, note this performed using a
- *		fork/waitpid, so it can take longer than just creating
- *		a pthread to send RPCs
- * RET count of queued requests remaining
+ *	fork/waitpid, so it can take longer than just creating a pthread
+ *	to send RPCs
  */
-extern int agent_retry (int min_wait, bool mail_too);
+extern void agent_retry(int min_wait, bool mail_too);
 
 /* agent_purge - purge all pending RPC requests */
 extern void agent_purge (void);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/job_mgr.c slurm-llnl-16.05.9/src/slurmctld/job_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/job_mgr.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/job_mgr.c	2017-01-31 20:56:34.000000000 +0100
@@ -7470,6 +7470,7 @@
 {
 	time_t now = time(NULL);
 
+	last_job_update = now;
 	job_ptr->job_state &= (~JOB_CONFIGURING);
 	job_ptr->tot_sus_time = difftime(now, job_ptr->start_time);
 	if ((job_ptr->time_limit != INFINITE) && (job_ptr->tot_sus_time != 0)) {
@@ -7486,9 +7487,20 @@
 	if (bit_overlap(job_ptr->node_bitmap, power_node_bitmap))
 		return false;
 
-	if (job_ptr->wait_all_nodes && 
-	    ((select_g_job_ready(job_ptr) & READY_NODE_STATE) == 0))
-		return false;
+	if (job_ptr->wait_all_nodes) {
+		/* Make sure all nodes ready to start job */
+		if ((select_g_job_ready(job_ptr) & READY_NODE_STATE) == 0)
+			return false;
+	} else if (job_ptr->batch_flag) {
+		/* Make first node is ready to start batch job */
+		int i_first = bit_ffs(job_ptr->node_bitmap);
+		struct node_record *node_ptr = node_record_table_ptr + i_first;
+		if ((i_first != -1) &&
+		    (IS_NODE_POWER_SAVE(node_ptr) ||
+		     IS_NODE_POWER_UP(node_ptr))) {
+			return false;
+		}
+	}
 
 	return true;
 }
@@ -12846,6 +12858,7 @@
 	if (job_ptr->alias_list && !xstrcmp(job_ptr->alias_list, "TBD") &&
 	    (prolog == 0) && job_ptr->node_bitmap &&
 	    (bit_overlap(power_node_bitmap, job_ptr->node_bitmap) == 0)) {
+		last_job_update = time(NULL);
 		job_ptr->job_state &= (~JOB_CONFIGURING);
 		set_job_alias_list(job_ptr);
 	}
@@ -14373,6 +14386,8 @@
 	 * to add it again. */
 	acct_policy_add_job_submit(job_ptr);
 
+	acct_policy_update_pending_job(job_ptr);
+
 	if (state & JOB_SPECIAL_EXIT) {
 		job_ptr->job_state |= JOB_SPECIAL_EXIT;
 		job_ptr->state_reason = WAIT_HELD_USER;
diff -Nru slurm-llnl-16.05.8/src/slurmctld/job_scheduler.c slurm-llnl-16.05.9/src/slurmctld/job_scheduler.c
--- slurm-llnl-16.05.8/src/slurmctld/job_scheduler.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/job_scheduler.c	2017-01-31 20:56:34.000000000 +0100
@@ -574,6 +574,7 @@
 	ListIterator job_iterator;
 	slurmctld_lock_t job_write_lock =
 		{ READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
+	time_t now = time(NULL);
 #ifdef HAVE_BG
 	static uint16_t cpus_per_node = 0;
 	if (!cpus_per_node)
@@ -591,7 +592,8 @@
 			continue;
 		if (part_ptr == NULL)
 			continue;
-		if ((job_ptr->details == NULL) || job_ptr->details->begin_time)
+		if ((job_ptr->details == NULL) ||
+		    (job_ptr->details->begin_time > now))
 			continue;
 		if ((part_ptr->state_up & PARTITION_SCHED) == 0)
 			continue;
@@ -863,8 +865,20 @@
 			info("sched: Allocate JobId=%u Partition=%s NodeList=%s #CPUs=%u",
 			     job_ptr->job_id, job_ptr->part_ptr->name,
 			     job_ptr->nodes, job_ptr->total_cpus);
-			if ((job_ptr->details->prolog_running == 0) &&
-			    ((job_ptr->bit_flags & NODE_REBOOT) == 0)) {
+
+			if (
+#ifdef HAVE_BG
+				/* On a bluegene system we need to run the
+				 * prolog while the job is CONFIGURING so this
+				 * can't work off the CONFIGURING flag as done
+				 * elsewhere.
+				 */
+				!job_ptr->details->prolog_running &&
+				!(job_ptr->bit_flags & NODE_REBOOT)
+#else
+				!IS_JOB_CONFIGURING(job_ptr)
+#endif
+				) {
 				launch_msg = build_launch_job_msg(job_ptr,
 							msg->protocol_version);
 			}
@@ -1842,10 +1856,20 @@
 #endif
 			if (job_ptr->batch_flag == 0)
 				srun_allocate(job_ptr->job_id);
-			else if ((job_ptr->details->prolog_running == 0) &&
-			         ((job_ptr->bit_flags & NODE_REBOOT) == 0)) {
+			else if (
+#ifdef HAVE_BG
+				/* On a bluegene system we need to run the
+				 * prolog while the job is CONFIGURING so this
+				 * can't work off the CONFIGURING flag as done
+				 * elsewhere.
+				 */
+				!job_ptr->details->prolog_running &&
+				!(job_ptr->bit_flags & NODE_REBOOT)
+#else
+				!IS_JOB_CONFIGURING(job_ptr)
+#endif
+				)
 				launch_job(job_ptr);
-			}
 			rebuild_job_part_list(job_ptr);
 			job_cnt++;
 			if (is_job_array_head &&
@@ -3181,7 +3205,8 @@
 	}
 
 	/* Enforce reservation: access control, time and nodes */
-	if (job_ptr->details->begin_time)
+	if (job_ptr->details->begin_time &&
+	    (job_ptr->details->begin_time > now))
 		start_res = job_ptr->details->begin_time;
 	else
 		start_res = now;
@@ -3753,10 +3778,10 @@
 		return errno;
 	}
 
-	if (job_ptr->details)
+	if (job_ptr->details) {
 		job_ptr->details->prolog_running++;
-
-	job_ptr->job_state |= JOB_CONFIGURING;
+		job_ptr->job_state |= JOB_CONFIGURING;
+	}
 
 	slurm_attr_init(&thread_attr_prolog);
 	pthread_attr_setdetachstate(&thread_attr_prolog,
diff -Nru slurm-llnl-16.05.8/src/slurmctld/node_mgr.c slurm-llnl-16.05.9/src/slurmctld/node_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/node_mgr.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/node_mgr.c	2017-01-31 20:56:34.000000000 +0100
@@ -3346,9 +3346,11 @@
 	char *host_str = NULL;
 	hostlist_t no_resp_hostlist = NULL;
 
-	for (i=0; i<node_record_count; i++) {
+	for (i = 0; i < node_record_count; i++) {
 		node_ptr = &node_record_table_ptr[i];
-		if (!node_ptr->not_responding)
+		if (!node_ptr->not_responding ||
+		    IS_NODE_POWER_SAVE(node_ptr) ||
+		    IS_NODE_POWER_UP(node_ptr))
 			continue;
 		if (no_resp_hostlist) {
 			(void) hostlist_push_host(no_resp_hostlist,
diff -Nru slurm-llnl-16.05.8/src/slurmctld/proc_req.c slurm-llnl-16.05.9/src/slurmctld/proc_req.c
--- slurm-llnl-16.05.8/src/slurmctld/proc_req.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/proc_req.c	2017-01-31 20:56:34.000000000 +0100
@@ -3381,8 +3381,20 @@
 				_throttle_fini(&active_rpc_cnt);
 				goto fini;
 			}
-			if (job_ptr->details &&
-			    job_ptr->details->prolog_running) {
+
+			if (
+#ifdef HAVE_BG
+				/* On a bluegene system we need to run the
+				 * prolog while the job is CONFIGURING so this
+				 * can't work off the CONFIGURING flag as done
+				 * elsewhere.
+				 */
+				job_ptr->details &&
+				job_ptr->details->prolog_running
+#else
+				IS_JOB_CONFIGURING(job_ptr)
+#endif
+				) {
 				slurm_send_rc_msg(msg, EAGAIN);
 				unlock_slurmctld(job_write_lock);
 				_throttle_fini(&active_rpc_cnt);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/reservation.c slurm-llnl-16.05.9/src/slurmctld/reservation.c
--- slurm-llnl-16.05.8/src/slurmctld/reservation.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/reservation.c	2017-01-31 20:56:34.000000000 +0100
@@ -5400,7 +5400,6 @@
 		}
 		_advance_resv_time(resv_ptr);
 		if ((resv_ptr->job_run_cnt    == 0) &&
-		    (resv_ptr->flags_set_node == false) &&
 		    ((resv_ptr->flags & RESERVE_FLAG_DAILY ) == 0) &&
 		    ((resv_ptr->flags & RESERVE_FLAG_WEEKLY) == 0)) {
 			if (resv_ptr->job_pend_cnt) {
diff -Nru slurm-llnl-16.05.8/src/slurmctld/step_mgr.c slurm-llnl-16.05.9/src/slurmctld/step_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/step_mgr.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/step_mgr.c	2017-01-31 20:56:34.000000000 +0100
@@ -981,11 +981,12 @@
 				return NULL;
 			}
 		}
-		if (job_ptr->details
-		    && job_ptr->details->prolog_running == 0) {
+		if (IS_JOB_CONFIGURING(job_ptr)) {
 			info("%s: Configuration for job %u is complete",
 			      __func__, job_ptr->job_id);
 			job_config_fini(job_ptr);
+			if (job_ptr->bit_flags & NODE_REBOOT)
+				job_validate_mem(job_ptr);
 		}
 	}
 
diff -Nru slurm-llnl-16.05.8/src/slurmd/common/xcgroup.c slurm-llnl-16.05.9/src/slurmd/common/xcgroup.c
--- slurm-llnl-16.05.8/src/slurmd/common/xcgroup.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/common/xcgroup.c	2017-01-31 20:56:34.000000000 +0100
@@ -449,7 +449,6 @@
 	char* file_path;
 	uid_t uid;
 	gid_t gid;
-	int create_only;
 	uint32_t notify;
 
 	/* init variables based on input cgroup */
@@ -457,7 +456,6 @@
 	file_path = cg->path;
 	uid = cg->uid;
 	gid = cg->gid;
-	create_only = 0;
 	notify = cg->notify;
 
 	/* save current mask and apply working one */
@@ -465,20 +463,23 @@
 	omask = umask(cmask);
 
 	/* build cgroup */
- 	if (mkdir(file_path, 0755)) {
-		if (create_only || errno != EEXIST) {
-			debug2("%s: unable to create cgroup '%s' : %m",
-			       __func__, file_path);
+	if (mkdir(file_path, 0755)) {
+		if (errno != EEXIST) {
+			error("%s: unable to create cgroup '%s' : %m",
+			      __func__, file_path);
 			umask(omask);
 			return fstatus;
+		} else {
+			debug("%s: cgroup '%s' already exists",
+			      __func__, file_path);
 		}
 	}
 	umask(omask);
 
 	/* change cgroup ownership as requested */
 	if (chown(file_path, uid, gid)) {
-		debug2("%s: unable to chown %d:%d cgroup '%s' : %m",
-		       __func__, uid, gid, file_path);
+		error("%s: unable to chown %d:%d cgroup '%s' : %m",
+		      __func__, uid, gid, file_path);
 		return fstatus;
 	}
 
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.c slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.c
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.c	2017-01-31 20:56:34.000000000 +0100
@@ -128,6 +128,7 @@
 static pthread_mutex_t message_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t message_cond = PTHREAD_COND_INITIALIZER;
 static int message_connections;
+static int msg_target_node_id = 0;
 
 /*
  *  Returns true if "uid" is a "slurm authorized user" - i.e. uid == 0
@@ -739,8 +740,6 @@
 	int errnum = 0;
 	int sig;
 	static int msg_sent = 0;
-	char *ptr = NULL;
-	int target_node_id = 0;
 	stepd_step_task_info_t *task;
 	uint32_t i;
 	uint32_t flag;
@@ -792,11 +791,8 @@
 		}
 	}
 
-	ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID");
-	if (ptr)
-		target_node_id = atoi(ptr);
 	if ((job->stepid != SLURM_EXTERN_CONT) &&
-	    (job->nodeid == target_node_id) && (msg_sent == 0) &&
+	    (job->nodeid == msg_target_node_id) && (msg_sent == 0) &&
 	    (job->state < SLURMSTEPD_STEP_ENDING)) {
 		time_t now = time(NULL);
 		char entity[24], time_str[24];
@@ -1818,3 +1814,10 @@
 		}
 	}
 }
+
+extern void set_msg_node_id(stepd_step_rec_t *job)
+{
+	char *ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID");
+	if (ptr)
+		msg_target_node_id = atoi(ptr);
+}
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.h slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.h
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.h	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.h	2017-01-31 20:56:34.000000000 +0100
@@ -46,4 +46,6 @@
 /* Delay until a job is resumed */
 extern void wait_for_resumed(uint16_t msg_type);
 
+extern void set_msg_node_id(stepd_step_rec_t *job);
+
 #endif /* _STEP_REQ_H */
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/slurmstepd.c slurm-llnl-16.05.9/src/slurmd/slurmstepd/slurmstepd.c
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/slurmstepd.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/slurmstepd.c	2017-01-31 20:56:34.000000000 +0100
@@ -581,6 +581,8 @@
 	env_array_overwrite(&job->env,"SLURM_TOPOLOGY_ADDR_PATTERN",
 			    conf->node_topo_pattern);
 
+	set_msg_node_id(job);
+
 	return job;
 }
 
diff -Nru slurm-llnl-16.05.8/src/srun/libsrun/allocate.c slurm-llnl-16.05.9/src/srun/libsrun/allocate.c
--- slurm-llnl-16.05.8/src/srun/libsrun/allocate.c	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/srun/libsrun/allocate.c	2017-01-31 20:56:34.000000000 +0100
@@ -877,6 +877,7 @@
 		j->power_flags = opt.power_flags;
 	if (opt.mcs_label)
 		j->mcs_label = opt.mcs_label;
+	j->wait_all_nodes = 1;
 
 	return j;
 }
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test10.13 slurm-llnl-16.05.9/testsuite/expect/test10.13
--- slurm-llnl-16.05.8/testsuite/expect/test10.13	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test10.13	2017-01-31 20:56:34.000000000 +0100
@@ -44,6 +44,11 @@
 
 print_header $test_id
 
+if { [test_bluegene] == 0 } {
+	send_user "\nWARNING: This test is only compatible with bluegene systems\n"
+	exit 0
+}
+
 if {[file exists $smap] == 0} {
 	send_user "\nWARNING: smap not installed\n"
 	exit 0
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test10.5 slurm-llnl-16.05.9/testsuite/expect/test10.5
--- slurm-llnl-16.05.8/testsuite/expect/test10.5	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test10.5	2017-01-31 20:56:34.000000000 +0100
@@ -42,6 +42,11 @@
 
 print_header $test_id
 
+if { [test_bluegene] == 0 } {
+	send_user "\nWARNING: This test is only compatible with bluegene systems\n"
+	exit 0
+}
+
 if {[file exists $smap] == 0} {
 	send_user "\nWARNING: smap not installed\n"
 	exit 0
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.14 slurm-llnl-16.05.9/testsuite/expect/test1.14
--- slurm-llnl-16.05.8/testsuite/expect/test1.14	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.14	2017-01-31 20:56:34.000000000 +0100
@@ -39,7 +39,6 @@
 set file_out        "test$test_id.output"
 set file_out2       "test$test_id.output2"
 set job_id           0
-set sleep_secs       10
 
 
 print_header $test_id
@@ -64,10 +63,15 @@
 exec $bin_rm -f $file_in $file_in2 $file_out $file_out2
 make_bash_script $file_in "
   echo tasks_per_node=\$SLURM_TASKS_PER_NODE
+  if \[ \$SLURM_TASKS_PER_NODE -gt 32 \]; then
+    sleep_secs=45
+  else
+    sleep_secs=10
+  fi
   inx=0
   while \[ \$inx -lt \$SLURM_TASKS_PER_NODE \]
   do
-    $srun --exclusive -n1 $bin_sleep $sleep_secs &
+    $srun --exclusive -n1 $bin_sleep \$sleep_secs &
     inx=\$((inx+1))
   done
   $bin_sleep 4
@@ -81,7 +85,7 @@
 #
 # Spawn a job via sbatch
 #
-spawn $sbatch -N1 -t1 --gres=craynetwork:0 --output=$file_out $file_in
+spawn $sbatch -N1 -t2 --gres=craynetwork:0 --output=$file_out $file_in
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
@@ -149,25 +153,30 @@
 # Delete left-over input script
 # Build another input script file
 # Run one more step than allocated CPUs with immediate option and make aborts
-# The "sleep 4" is meant to insure the earlier job steps start first
+# The "sleep" is meant to insure the earlier job steps start first
 #
 exec $bin_rm -f $file_in $file_out
 make_bash_script $file_in "
   inx=0
+  if \[ \$SLURM_TASKS_PER_NODE -gt 32 \]; then
+    sleep_secs=45
+  else
+    sleep_secs=10
+  fi
   while \[ \$inx -lt \$SLURM_TASKS_PER_NODE \]
   do
-    $srun --exclusive -n1 $bin_sleep $sleep_secs &
+    $srun --exclusive -n1 --mem=0 $bin_sleep \$sleep_secs &
     inx=\$((inx+1))
   done
   $bin_sleep 4
-  $srun -v --exclusive -n1 --immediate $file_in2 &
+  $srun -v --exclusive -n1 --mem=0 --immediate $file_in2 &
   wait
 "
 
 #
 # Spawn a job via sbatch
 #
-spawn $sbatch -N1 -t1 --gres=craynetwork:0 --output=$file_out2 $file_in
+spawn $sbatch -N1 -t2 --gres=craynetwork:0 --output=$file_out2 $file_in
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test12.2 slurm-llnl-16.05.9/testsuite/expect/test12.2
--- slurm-llnl-16.05.8/testsuite/expect/test12.2	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test12.2	2017-01-31 20:56:34.000000000 +0100
@@ -186,7 +186,7 @@
 	# Compute error in MB
 	set diff_io [expr $max_disk_write - $max_disk_read]
 	set error_io [expr abs($diff_io)]
-	if { $error_io > 0.05 } {
+	if { $error_io > 0.3 } {
 		send_user "\nFAILURE: written file size does not match read size "
 		send_user "file_size:$mb_file_size MB "
 		send_user "max_disk_write:$max_disk_write MB "
@@ -196,7 +196,7 @@
 
 	set diff_io [expr $ave_disk_write - $ave_disk_read]
 	set error_io [expr abs($diff_io)] 
-	if { $error_io > 0.05 } {
+	if { $error_io > 0.3 } {
 		send_user "\nFAILURE: average written file size "
 		send_user "does not match average read size "
 		send_user "file_size:$mb_file_size MB "
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test14.10 slurm-llnl-16.05.9/testsuite/expect/test14.10
--- slurm-llnl-16.05.8/testsuite/expect/test14.10	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test14.10	2017-01-31 20:56:34.000000000 +0100
@@ -91,10 +91,10 @@
 $srun -N1 -n1 -w$node2 ./$file_in2
 
 echo -n \"Checking node 1: \"
-$srun -N1 -n1 -w$node2 ls /tmp/$node2/test$test_id\_file
+$srun -Q -N1 -n1 -w$node2 ls /tmp/$node2/test$test_id\_file
 
 echo -n \"Checking node 0: \"
-$srun -N1 -n1 -w$node1 ls /tmp/$node1/test$test_id\_file
+$srun -Q -N1 -n1 -w$node1 ls /tmp/$node1/test$test_id\_file
 
 $srun $bin_rm -f /tmp/$node1/test$test_id\_file
 $srun $bin_rm -fr /tmp/$node1
@@ -107,6 +107,7 @@
 "
 
 # Make allocations
+set timeout $max_job_delay
 set matches 0
 spawn $salloc -N2 -w$hostlist -t1 ./$file_in1
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.52 slurm-llnl-16.05.9/testsuite/expect/test1.52
--- slurm-llnl-16.05.8/testsuite/expect/test1.52	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.52	2017-01-31 20:56:34.000000000 +0100
@@ -37,7 +37,7 @@
 set exit_code   0
 set num_nodes   2
 set num_tasks   2
-set node_count  0
+set idle_nodes  0
 set max_nodes   0
 set task_count  0
 set hostfile    "test$test_id.hostfile"
@@ -71,10 +71,6 @@
 		set max_nodes 999999
 		exp_continue
 	}
-	-re "TotalNodes=($number)" {
-		set node_count $expect_out(1,string)
-		exp_continue
-	}
 	timeout {
 		send_user "\nFAILURE: scontrol not responding\n"
 		exit 1
@@ -83,8 +79,14 @@
 		wait
 	}
 }
-if { ($node_count < 3) || ($max_nodes < 3) } {
-	send_user "WARNING: system must have at least 3 nodes to run this test on. $node_count $max_nodes\n"
+
+set idle_nodes [available_nodes $def_part idle]
+if { ($idle_nodes < 3) || ($max_nodes < 3) } {
+	if { $max_nodes == 999999 } {
+		send_user "WARNING: partition $def_part must have at least 3 idle nodes and MaxNodes >= 3 to run this test on. IDLE:$idle_nodes MaxNodes:UNLIMITED\n"
+	} else {
+		send_user "WARNING: partition $def_part must have at least 3 idle nodes and MaxNodes >= 3 to run this test on. IDLE:$idle_nodes MaxNodes:$max_nodes\n"
+	}
 	exit $exit_code
 }
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test15.22 slurm-llnl-16.05.9/testsuite/expect/test15.22
--- slurm-llnl-16.05.8/testsuite/expect/test15.22	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test15.22	2017-01-31 20:56:34.000000000 +0100
@@ -131,9 +131,15 @@
 #
 # Submit job explicitly to a non-default partition
 #
-set job_id           0
+set job_id          0
+set legit_failure   0
 set salloc_pid [spawn $salloc --partition=$other_part_name -t1 $bin_sleep 1]
 expect {
+	-re "Required node not available" {
+		set legit_failure 1
+		exec $bin_kill -INT $salloc_pid
+		exp_continue
+	}
 	-re "Granted job allocation ($number)" {
 		set job_id $expect_out(1,string)
 		exp_continue
@@ -151,7 +157,9 @@
 	}
 }
 # Confirm the job's partition
-if {$job_id == 0} {
+if {$legit_failure == 1} {
+	send_user "\nWARNING: partition '$other_part_name' is not usable\n"
+} elseif {$job_id == 0} {
 	send_user "\nFAILURE: batch submit failure\n"
 	set exit_code 1
 } else {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.63 slurm-llnl-16.05.9/testsuite/expect/test1.63
--- slurm-llnl-16.05.8/testsuite/expect/test1.63	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.63	2017-01-31 20:56:34.000000000 +0100
@@ -72,6 +72,7 @@
 	}
 	-re "Hello World!" {
 		incr match_run
+		sleep 0.1
 		exec $bin_kill -INT $srun_pid
 		exp_continue
 	}
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.34 slurm-llnl-16.05.9/testsuite/expect/test17.34
--- slurm-llnl-16.05.8/testsuite/expect/test17.34	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.34	2017-01-31 20:56:34.000000000 +0100
@@ -62,8 +62,14 @@
 	} else {
 		set task_limit 1
 	}
+
+	set ntasks [expr abs($task_limit + $task)]
+	if {$ntasks == 0} {
+		set ntasks 1
+	}
+
 	set error_chk 0
-	spawn $sbatch -t1 -w$node -S$core_spec -n[expr abs($task_limit + $task)] -o$file_out $spec_in
+	spawn $sbatch -t1 -w$node -S$core_spec -n$ntasks -o$file_out $spec_in
 	expect {
 		-re "Submitted batch job ($number)" {
 			set job_id $expect_out(1,string)
@@ -156,6 +162,19 @@
 
 print_header $test_id
 
+set select_type [test_select_type]
+if {![string compare $select_type "linear"] || ![string compare $select_type "serial"]} {
+	send_user "\nWARNING: This test is incompatible with select/$select_type\n"
+	exit 0
+} elseif {![string compare $select_type "cray"] && ![test_select_type_params "other_cons_res"]} {
+	send_user "\nWARNING: This test is incompatible with select/linear\n"
+	exit 0
+}
+if {[test_select_type_params "CR_SOCKET"]} {
+	send_user "\nWARNING: This test is incompatible with CR_SOCKET allocations\n"
+	exit 0
+}
+
 log_user 0
 set allow_spec 0
 spawn $scontrol show config
@@ -178,16 +197,6 @@
 	exit $exit_code
 }
 
-set select_type [test_select_type]
-if {![string compare $select_type "linear"]} {
-	send_user "\nWARNING: This test is incompatible with select/$select_type\n"
-	exit 0
-}
-if {[test_select_type_params "CR_SOCKET"]} {
-	send_user "\nWARNING: This test is incompatible with CR_SOCKET allocations\n"
-	exit 0
-}
-
 # Remove any vestigial files
 exec $bin_rm -f $file_in $file_out $spec_in
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.39 slurm-llnl-16.05.9/testsuite/expect/test17.39
--- slurm-llnl-16.05.8/testsuite/expect/test17.39	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.39	2017-01-31 20:56:34.000000000 +0100
@@ -35,8 +35,6 @@
 set slow_id       0
 set fast_id       0
 set dep_id        0
-set slow_job      "test$test_id\_slow_sc"
-set fast_job      "test$test_id\_fast_sc"
 set exit_code     0
 
 print_header $test_id
@@ -56,9 +54,6 @@
 	}
 }
 
-make_bash_script $slow_job "sleep 120"
-make_bash_script $fast_job "sleep 30"
-
 proc check_state {id state} {
 	global squeue exit_code
 
@@ -85,14 +80,8 @@
 	}
 }
 
-if {[test_select_type_params "MEMORY"]} {
-	set job_mem  10
-} else {
-	set job_mem  1
-}
-
 # Submit job 1 of 3
-spawn $sbatch -t3 -o/dev/null --mem=${job_mem} $slow_job
+spawn $sbatch -t3 -o /dev/null --wrap "sleep 120"
 expect {
 	-re "Submitted batch job ($number)" {
 		set slow_id $expect_out(1,string)
@@ -112,7 +101,7 @@
 }
 
 # Submit job 2 of 3
-spawn $sbatch -t3 -o/dev/null --mem=${job_mem} $fast_job
+spawn $sbatch -t3 -o /dev/null --wrap "sleep 30"
 expect {
 	-re "Node count specification invalid" {
 		send_user "\nWARNING: can't test with less than two nodes\n"
@@ -136,7 +125,7 @@
 }
 
 # Submit dependency job, 3 of 3
-spawn $sbatch --dependency=afterok:$slow_id?afterok:$fast_id -o/dev/null --mem=${job_mem} $slow_job
+spawn $sbatch --dependency=afterok:$slow_id?afterok:$fast_id -o /dev/null --wrap "sleep 120"
 expect {
 	-re "Submitted batch job ($number)" {
 		set dep_id $expect_out(1,string)
@@ -197,7 +186,6 @@
 cancel_job $dep_id
 
 if {$exit_code == 0} {
-	exec $bin_rm -f $slow_job $fast_job
 	send_user "\nSUCCESS\n"
 }
 exit $exit_code
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.74 slurm-llnl-16.05.9/testsuite/expect/test1.74
--- slurm-llnl-16.05.8/testsuite/expect/test1.74	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.74	2017-01-31 20:56:34.000000000 +0100
@@ -65,6 +65,10 @@
 	send_user "\nWARNING: This test is incompatible with serial systems\n"
 	exit $exit_code
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+	send_user "\nThis test can't be run without being an Accounting administrator.\n"
+	exit $exit_code
+}
 
 spawn $bin_id -u -n
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.40 slurm-llnl-16.05.9/testsuite/expect/test17.40
--- slurm-llnl-16.05.8/testsuite/expect/test17.40	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.40	2017-01-31 20:56:34.000000000 +0100
@@ -156,6 +156,19 @@
 
 print_header $test_id
 
+set select_type [test_select_type]
+if {![string compare $select_type "linear"] || ![string compare $select_type "serial"]} {
+	send_user "\nWARNING: This test is incompatible with select/$select_type\n"
+	exit 0
+} elseif {![string compare $select_type "cray"] && ![test_select_type_params "other_cons_res"]} {
+	send_user "\nWARNING: This test is incompatible with select/linear\n"
+	exit 0
+}
+if {[test_select_type_params "CR_SOCKET"]} {
+	send_user "\nWARNING: This test is incompatible with CR_SOCKET allocations\n"
+	exit 0
+}
+
 log_user 0
 set allow_spec 0
 spawn $scontrol show config
@@ -178,17 +191,6 @@
 	exit $exit_code
 }
 
-set select_type [test_select_type]
-if {![string compare $select_type "linear"] || ![string compare $select_type "serial"]} {
-	send_user "\nWARNING: This test is incompatible with select/$select_type\n"
-	exit 0
-}
-
-if {[test_select_type_params "CR_SOCKET"]} {
-	send_user "\nWARNING: This test is incompatible with CR_SOCKET allocations\n"
-	exit 0
-}
-
 # Remove any vestigial files
 exec $bin_rm -f $file_in $file_out $spec_in
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test21.36 slurm-llnl-16.05.9/testsuite/expect/test21.36
--- slurm-llnl-16.05.8/testsuite/expect/test21.36	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test21.36	2017-01-31 20:56:34.000000000 +0100
@@ -81,6 +81,10 @@
 	send_user "\nThis test can't be run without AccountStorageType=slurmdbd\n"
 	exit 0
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+	send_user "\nThis test can't be run without being an Accounting administrator.\n"
+	exit 0
+}
 
 # Remove pre-existing items
 cleanup
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test2.8 slurm-llnl-16.05.9/testsuite/expect/test2.8
--- slurm-llnl-16.05.8/testsuite/expect/test2.8	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test2.8	2017-01-31 20:56:34.000000000 +0100
@@ -35,7 +35,6 @@
 
 set test_id     "2.8"
 set exit_code   0
-set file_in     "test$test_id.input"
 set is_bluegene 0
 set job_id1     0
 set job_id2     0
@@ -65,26 +64,10 @@
 	set step_id 0
 }
 
-if {[test_select_type_params "MEMORY"]} {
-	set job_mem  20
-	set step_mem 10
-} else {
-	set job_mem  1
-	set step_mem 1
-}
-
-#
-# Build input script file
-#
-make_bash_script $file_in "
-  $srun --mem=${step_mem} $bin_sleep 60 &
-  $srun --mem=${step_mem} $bin_sleep 60
-"
-
 #
 # Submit a couple jobs so we have something to work with
 #
-set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --mem=${job_mem} $file_in]
+set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --wrap "$srun $bin_sleep 60"]
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id1 $expect_out(1,string)
@@ -104,7 +87,7 @@
 	exit 1
 }
 
-set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --mem=${job_mem} $file_in]
+set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --wrap "$srun $bin_sleep 60"]
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id2 $expect_out(1,string)
@@ -126,8 +109,6 @@
 	exit 1
 }
 
-exec $bin_rm -f $file_in
-
 if {[wait_for_job $job_id1 "RUNNING"] != 0} {
         send_user "\nFAILURE: waiting for job $job_id1 to start\n"
         cancel_job $job_id1
@@ -451,4 +432,3 @@
 	send_user "\nSUCCESS\n"
 }
 exit $exit_code
-
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test28.7 slurm-llnl-16.05.9/testsuite/expect/test28.7
--- slurm-llnl-16.05.8/testsuite/expect/test28.7	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test28.7	2017-01-31 20:56:34.000000000 +0100
@@ -33,7 +33,6 @@
 set test_id       "28.7"
 set exit_code     0
 set array_size    3
-set script        "test$test_id\.bash"
 set top_array_task_id [expr $array_size - 1]
 
 print_header $test_id
@@ -43,20 +42,11 @@
 	exit 0
 }
 
-if {[test_select_type_params "MEMORY"]} {
-	set job_mem  10
-} else {
-	set job_mem  1
-}
-
-exec $bin_rm -f $script
-make_bash_script $script "sleep \$(( ( RANDOM % 10 ) + 1 ))"
-
 #
 # Submit a job array for first dependency test
 #
 set job_id 0
-spawn $sbatch -N1 -t1 -a 0-$top_array_task_id -o /dev/null -e /dev/null --mem=${job_mem} $script
+spawn $sbatch -N1 -t1 -a 0-$top_array_task_id -o /dev/null -e /dev/null --wrap "sleep \$(( ( RANDOM % 10 ) + 1 ))"
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
@@ -80,7 +70,7 @@
 #
 set timeout $max_job_delay
 set match_job_state 0
-set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id\_$top_array_task_id --mem=${job_mem} $scontrol show job $job_id\_$top_array_task_id]
+set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id\_$top_array_task_id $scontrol show job $job_id\_$top_array_task_id]
 expect {
 	-re "JobState=COMPLETED|COMPLETING" {
 		set match_job_state 1
@@ -108,7 +98,7 @@
 # Submit a job array for second dependency test
 #
 set job_id 0
-spawn $sbatch -N1 -t1 -a 0-[expr $array_size - 1] -o /dev/null -e /dev/null --mem=${job_mem} $script
+spawn $sbatch -N1 -t1 -a 0-[expr $array_size - 1] -o /dev/null -e /dev/null --wrap "sleep \$(( ( RANDOM % 10 ) + 1 ))"
 expect {
 	-re "Submitted batch job ($number)" {
 		set job_id $expect_out(1,string)
@@ -132,7 +122,7 @@
 #
 set timeout $max_job_delay
 set match_job_state 0
-set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id --mem=${job_mem} $scontrol show job $job_id]
+set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id $scontrol show job $job_id]
 expect {
 	-re "JobState=COMPLETED|COMPLETING" {
 		incr match_job_state
@@ -154,7 +144,6 @@
 
 cancel_job $job_id
 if {$exit_code == 0} {
-	exec $bin_rm -f $script
 	send_user "\nSUCCESS\n"
 }
 exit $exit_code
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test3.15 slurm-llnl-16.05.9/testsuite/expect/test3.15
--- slurm-llnl-16.05.8/testsuite/expect/test3.15	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test3.15	2017-01-31 20:56:34.000000000 +0100
@@ -32,7 +32,6 @@
 
 set test_id		"3.15"
 set exit_code		0
-set script_name         "test$test_id.bash"
 set license_name        "test$test_id"
 set resv_name           "resv$test_id"
 set user_name		""
@@ -57,10 +56,9 @@
 }
 
 proc submit_job { license_count } {
-	global script_name bin_sleep license_name sbatch number exit_code job_mem
+	global bin_sleep license_name sbatch number exit_code
 	set job_id 0
-	make_bash_script $script_name "$bin_sleep 300"
-	spawn $sbatch -n1 -t1 -o /dev/null -L $license_name:$license_count --mem=${job_mem} $script_name
+	spawn $sbatch -n1 -t1 -o /dev/null -L $license_name:$license_count --wrap "$bin_sleep 300"
 	expect {
 		-re "Submitted batch job ($number)" {
 			set job_id $expect_out(1,string)
@@ -187,12 +185,6 @@
 	exit $exit_code
 }
 
-if {[test_select_type_params "MEMORY"]} {
-	set job_mem  10
-} else {
-	set job_mem  1
-}
-
 spawn $bin_id -un
 expect {
 	-re "($alpha_numeric_under)" {
@@ -378,7 +370,7 @@
 reconfigure
 
 if {$exit_code == 0} {
-	exec $bin_rm -f $cwd/slurm.conf.orig $script_name
+	exec $bin_rm -f $cwd/slurm.conf.orig
 	send_user "\nSUCCESS\n"
 } else {
 	send_user "\nFAILURE\n"
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test35.2 slurm-llnl-16.05.9/testsuite/expect/test35.2
--- slurm-llnl-16.05.8/testsuite/expect/test35.2	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test35.2	2017-01-31 20:56:34.000000000 +0100
@@ -42,6 +42,28 @@
 set script_use  "test$test_id.use.bash"
 set tmp_file    "test$test_id"
 
+#
+# get my uid and clear any vestigial triggers
+#
+set uid -1
+spawn $bin_id -u
+expect {
+	-re "($number)" {
+		set uid $expect_out(1,string)
+		exp_continue
+	}
+	eof {
+		wait
+	}
+}
+if {$uid == -1} {
+	send_user "\nCan't get my uid\n"
+	exit 1
+} elseif {$uid == 0} {
+	send_user "\nWARNING: Can't run this test as user root\n"
+	exit 0
+}
+
 proc find_bb_jobid { fname bb_jobid } {
 	global bin_cat
 
@@ -142,6 +164,9 @@
 	set exit_code 1
 }
 
+# Wait for purge of buffer to complete
+sleep 10
+
 set found 0
 spawn $scontrol show burst
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test5.9 slurm-llnl-16.05.9/testsuite/expect/test5.9
--- slurm-llnl-16.05.8/testsuite/expect/test5.9	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test5.9	2017-01-31 20:56:34.000000000 +0100
@@ -98,6 +98,10 @@
 	send_user "\nWARNING: This test is incompatible with serial systems\n"
 	exit $exit_code
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+	send_user "\nThis test can't be run without being an Accounting administrator.\n"
+	exit $exit_code
+}
 set available [available_nodes $partition idle]
 if {$available < 2} {
     send_user "\nWARNING: not enough nodes currently available ($available avail, 2 needed)\n"
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test7.11 slurm-llnl-16.05.9/testsuite/expect/test7.11
--- slurm-llnl-16.05.8/testsuite/expect/test7.11	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test7.11	2017-01-31 20:56:34.000000000 +0100
@@ -69,10 +69,12 @@
 #
 log_user 0
 set config_dir ""
+set ctld_slurm_ver ""
 spawn $scontrol show config
 expect {
-	-re "SLURM_CONF.*= (/.*)/slurm.conf.*SLURM_VERSION" {
+	-re "SLURM_CONF.*= (.*)/slurm.conf.*SLURM_VERSION *= ($float)" {
 		set config_dir $expect_out(1,string)
+		set ctld_slurm_ver $expect_out(2,string)
 		exp_continue
 	}
 	eof {
@@ -84,6 +86,27 @@
 	send_user "\nFAILURE: Could not locate slurm.conf directory\n"
 	exit 1
 }
+
+log_user 0
+set loc_slurm_ver ""
+spawn $scontrol -V
+expect {
+	-re "slurm ($float)" {
+		set loc_slurm_ver $expect_out(1,string)
+		exp_continue
+	}
+	eof {
+		wait
+	}
+}
+log_user 1
+
+if {[string compare $ctld_slurm_ver $loc_slurm_ver]} {
+	send_user "\nWARNING: slurmctld ($ctld_slurm_ver) and local Slurm ($loc_slurm_ver) versions are not the same, can not continue.\n"
+	exit 0
+}
+
+
 set spank_conf_file ${config_dir}/plugstack.conf
 exec $bin_rm -f $orig_spank_conf $new_spank_conf $file_out $spank_out
 if {[file exists $spank_conf_file]} {
@@ -120,10 +143,6 @@
 	}
 }
 
-# Allow enough time for configuration file in NFS to be propagated
-# to all nodes of cluster
-exec sleep 60
-
 #
 # Test of srun help message
 #
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test7.13 slurm-llnl-16.05.9/testsuite/expect/test7.13
--- slurm-llnl-16.05.8/testsuite/expect/test7.13	2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test7.13	2017-01-31 20:56:34.000000000 +0100
@@ -166,7 +166,7 @@
 		}
 	}
 	if {$matches != 4} {
-		send_user "\nFAILURE: sacct of $job_id failed ($matches != 5)\n"
+		send_user "\nFAILURE: sacct of $job_id failed ($matches != 4)\n"
 		exit 1
 	}
 }

--- End Message ---
--- Begin Message ---
On Thu, 27 Apr 2017 00:43:29 +0200 Mehdi Dogguy <mehdi@debian.org> wrote:
> Package: release.debian.org
> Severity: normal
> User: release.debian.org@packages.debian.org
> Usertags: unblock
> 
> [...]
> 
> Can you please consider unblocking slurm-llnl?
> 
> [...]

Unblocked, thanks.

~Niels

--- End Message ---

Reply to: