Bug#990570: unblock: slurm-wlm/20.11.7-1
Hi Paul,
thank you for reviewing my request.
On Fri, Jul 02, 2021 at 09:41:34PM +0200, Paul Gevers wrote:
> 510 files changed, 6706 insertions(+), 5099 deletions(-)
>
> Your diff was so large, it didn't even reach the list.
I'm sorry I didn't review the debdiff before sending it.
> > [ Reason ]
> >
> > 20.11.6 and 20.11.7 were bugfix and stability releases with no feature changes.
> > Please allow this into bullseye.
>
> Can you please provide a filter diff that's reviewable to confirm this
> statement?
I'm attaching two diffs, one is with the relevant upstream changes and
one is with the changes in the Debian directory. I removed all the
changes in the documentation, in the test suite and all the changes due to a
switch in the automake version that resulted in one comment line change in
all the Makefile.in files.
> > [ Checklist ]
> > [X] all changes are documented in the d/changelog and in NEWS file
> > [X] I reviewed all changes and I approve them
>
> All 6000 lines?
I had a quick review to the relevant changes contained in the attached
patch.
Best regards,
--
Gennaro Oliva
diff --git a/auxdir/x_ac_systemd.m4 b/auxdir/x_ac_systemd.m4
index e46525c2..76ca05e0 100644
--- a/auxdir/x_ac_systemd.m4
+++ b/auxdir/x_ac_systemd.m4
@@ -7,8 +7,7 @@
#
# DESCRIPTION:
# Determine systemd presence
-# Determine systemd version
-# Determine systemd system unit dir
+# Substitute SYSTEMD_TASKSMAX_OPTION output var if systemd version >= 227
##*****************************************************************************
AC_DEFUN([X_AC_SYSTEMD],
@@ -25,10 +24,9 @@ AC_DEFUN([X_AC_SYSTEMD],
[1],
[Define systemd presence])
- _cv_systemd_version=`$PKG_CONFIG --modversion systemd 2>/dev/null`
-
SYSTEMD_TASKSMAX_OPTION=""
- if [test "$_cv_systemd_version" -ge 227]; then
+ $PKG_CONFIG --atleast-version 227 systemd
+ if [test "$?" -eq 0]; then
SYSTEMD_TASKSMAX_OPTION="TasksMax=infinity"
fi
AC_SUBST(SYSTEMD_TASKSMAX_OPTION)
diff --git a/contribs/perlapi/common/msg.h b/contribs/perlapi/common/msg.h
index a5f5b179..4df8eed2 100644
--- a/contribs/perlapi/common/msg.h
+++ b/contribs/perlapi/common/msg.h
@@ -370,6 +370,7 @@ static inline void * SV2ptr(SV *sv)
inline static int step_id_to_hv(slurm_step_id_t *step_id, HV *hv)
{
STORE_FIELD(hv, step_id, job_id, uint32_t);
+ STORE_FIELD(hv, step_id, step_het_comp, uint32_t);
STORE_FIELD(hv, step_id, step_id, uint32_t);
return 0;
@@ -378,6 +379,7 @@ inline static int step_id_to_hv(slurm_step_id_t *step_id, HV *hv)
inline static int hv_to_step_id(slurm_step_id_t *step_id, HV *hv)
{
FETCH_FIELD(hv, step_id, job_id, uint32_t, TRUE);
+ FETCH_FIELD(hv, step_id, step_het_comp, uint32_t, TRUE);
FETCH_FIELD(hv, step_id, step_id, uint32_t, TRUE);
return 0;
diff --git a/contribs/perlapi/libslurm/perl/step.c b/contribs/perlapi/libslurm/perl/step.c
index c64d66c8..8b6d159d 100644
--- a/contribs/perlapi/libslurm/perl/step.c
+++ b/contribs/perlapi/libslurm/perl/step.c
@@ -66,8 +66,14 @@ hv_to_job_step_info(HV *hv, job_step_info_t *step_info)
SV **svp;
AV *av;
int i, n;
+ HV *step_id_hv;
- HV *step_id_hv = (HV*)sv_2mortal((SV*)newHV());
+ svp = hv_fetch(hv, "step_id", 7, FALSE);
+ if (svp && SvROK(*svp) && SvTYPE(SvRV(*svp)) == SVt_PVHV) {
+ step_id_hv = (HV*)SvRV(*svp);
+ } else {
+ step_id_hv = (HV*)sv_2mortal((SV*)newHV());
+ }
memset(step_info, 0, sizeof(job_step_info_t));
diff --git a/contribs/perlapi/libslurm/perl/step_ctx.c b/contribs/perlapi/libslurm/perl/step_ctx.c
index edc82667..04106341 100644
--- a/contribs/perlapi/libslurm/perl/step_ctx.c
+++ b/contribs/perlapi/libslurm/perl/step_ctx.c
@@ -16,7 +16,13 @@
int
hv_to_slurm_step_ctx_params(HV *hv, slurm_step_ctx_params_t *params)
{
- HV *step_id_hv = (HV*)sv_2mortal((SV*)newHV());
+ HV *step_id_hv;
+ SV **svp = hv_fetch(hv, "step_id", 7, FALSE);
+ if (svp && SvROK(*svp) && SvTYPE(SvRV(*svp)) == SVt_PVHV) {
+ step_id_hv = (HV*)SvRV(*svp);
+ } else {
+ step_id_hv = (HV*)sv_2mortal((SV*)newHV());
+ }
slurm_step_ctx_params_t_init(params);
diff --git a/contribs/perlapi/libslurm/perl/t/07-spawn.t b/contribs/perlapi/libslurm/perl/t/07-spawn.t
index c50ba024..c77cbe34 100755
--- a/contribs/perlapi/libslurm/perl/t/07-spawn.t
+++ b/contribs/perlapi/libslurm/perl/t/07-spawn.t
@@ -41,7 +41,7 @@ ok($jobid, "allocate resources blocking") or diag("allocate_resources_blocking:
$params = {
- job_id => $jobid,
+ step_id => {job_id => $jobid, step_id => NO_VAL, step_het_comp => NO_VAL},
name => "perlapi_test",
min_nodes => 1,
task_count => 1,
diff --git a/contribs/torque/qstat.pl b/contribs/torque/qstat.pl
index e11efe0b..1016146c 100755
--- a/contribs/torque/qstat.pl
+++ b/contribs/torque/qstat.pl
@@ -499,7 +499,7 @@ sub print_job_select
}
printf(" %s\n", $execHost);
} else {
- printf("\n", $execHost);
+ printf("\n");
}
}
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index 7dd6a958..48bbd8a2 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -4807,6 +4807,10 @@ void slurm_init_trigger_msg(trigger_info_t *trigger_info_msg);
#define BB_FLAG_SET_EXEC_HOST 0x0020 /* Set execute host */
#define BB_SIZE_IN_NODES 0x8000000000000000
+/*
+ * Burst buffer states: Keep in sync with bb_state_string() and bb_state_num()
+ * in slurm_protocol_defs.c.
+ */
#define BB_STATE_PENDING 0x0000 /* Placeholder: no action started */
#define BB_STATE_ALLOCATING 0x0001 /* Cray: bbs_setup started */
#define BB_STATE_ALLOCATED 0x0002 /* Cray: bbs_setup started */
diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c
index 3e1a7fd5..2d5fe1c9 100644
--- a/src/common/assoc_mgr.c
+++ b/src/common/assoc_mgr.c
@@ -186,6 +186,14 @@ static slurmdb_assoc_rec_t *_find_assoc_rec_id(uint32_t assoc_id)
return NULL;
}
+static int _find_acct_by_name(void *x, void *y)
+{
+ slurmdb_coord_rec_t *acct = (slurmdb_coord_rec_t*) x;
+ if (!xstrcmp(acct->name, (char*)y))
+ return 1;
+ return 0;
+}
+
/*
* _find_assoc_rec - return a pointer to the assoc_ptr with the given
* contents of assoc.
@@ -2605,9 +2613,15 @@ extern int assoc_mgr_fill_in_user(void *db_conn, slurmdb_user_rec_t *user,
if (user_pptr)
*user_pptr = NULL;
- if (!assoc_mgr_user_list)
- if (_get_assoc_mgr_user_list(db_conn, enforce) == SLURM_ERROR)
+
+ if (!locked) {
+ if (!assoc_mgr_user_list &&
+ _get_assoc_mgr_user_list(db_conn, enforce) == SLURM_ERROR)
return SLURM_ERROR;
+ } else {
+ if (enforce & ACCOUNTING_ENFORCE_ASSOCS)
+ xassert(assoc_mgr_user_list);
+ }
if (!locked)
assoc_mgr_lock(&locks);
@@ -3061,6 +3075,23 @@ extern bool assoc_mgr_is_user_acct_coord(void *db_conn,
return false;
}
+extern bool assoc_mgr_is_user_acct_coord_user_rec(void *db_conn,
+ slurmdb_user_rec_t *user,
+ char *acct_name)
+{
+ if (!acct_name)
+ return false;
+
+ if (!user || !user->coord_accts)
+ return false;
+
+ if (list_find_first(user->coord_accts, _find_acct_by_name,
+ acct_name))
+ return true;
+
+ return false;
+}
+
extern void assoc_mgr_get_shares(void *db_conn,
uid_t uid, shares_request_msg_t *req_msg,
shares_response_msg_t *resp_msg)
diff --git a/src/common/assoc_mgr.h b/src/common/assoc_mgr.h
index 3e387ca5..768d7d05 100644
--- a/src/common/assoc_mgr.h
+++ b/src/common/assoc_mgr.h
@@ -278,6 +278,16 @@ extern slurmdb_admin_level_t assoc_mgr_get_admin_level(void *db_conn,
extern bool assoc_mgr_is_user_acct_coord(void *db_conn, uint32_t uid,
char *acct);
+/*
+ * see if user is coordinator of given acct
+ * IN: user - slurmdb_user_rec_t of user to check.
+ * IN: acct - name of account
+ * RET: true or false
+ */
+extern bool assoc_mgr_is_user_acct_coord_user_rec(void *db_conn,
+ slurmdb_user_rec_t *user,
+ char *acct_name);
+
/*
* get the share information from the association list
* IN: uid: uid_t of user issuing the request
diff --git a/src/common/fd.c b/src/common/fd.c
index dfb60aa2..cc9a0211 100644
--- a/src/common/fd.c
+++ b/src/common/fd.c
@@ -135,11 +135,11 @@ pid_t fd_is_read_lock_blocked(int fd)
int fd_get_socket_error(int fd, int *err)
{
- socklen_t errlen = sizeof(err);
+ socklen_t errlen = sizeof(*err);
xassert(fd >= 0);
- if (getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *)&err, &errlen))
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *)err, &errlen))
return errno;
else
return SLURM_SUCCESS;
diff --git a/src/common/fetch_config.c b/src/common/fetch_config.c
index 06c2528b..46b21a91 100644
--- a/src/common/fetch_config.c
+++ b/src/common/fetch_config.c
@@ -61,6 +61,15 @@ static config_response_msg_t *_fetch_parent(pid_t pid)
int status;
safe_read(to_parent[0], &len, sizeof(int));
+
+ /*
+ * A zero across the pipe indicates the child failed to fetch the
+ * config file for some reason. The child will have already printed
+ * some error messages about this, so just return.
+ */
+ if (len <= 0)
+ return NULL;
+
buffer = init_buf(len);
safe_read(to_parent[0], buffer->head, len);
@@ -86,7 +95,7 @@ static void _fetch_child(List controllers, uint32_t flags)
{
config_response_msg_t *config;
buf_t *buffer = init_buf(1024 * 1024);
- int len;
+ int len = 0;
/*
* Parent process was holding this, but we need to drop it before
@@ -102,6 +111,7 @@ static void _fetch_child(List controllers, uint32_t flags)
if (!config) {
error("%s: failed to fetch remote configs", __func__);
+ safe_write(to_parent[1], &len, sizeof(int));
_exit(1);
}
diff --git a/src/common/gres.c b/src/common/gres.c
index d03c9b3b..6853f086 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -8470,6 +8470,7 @@ extern void gres_plugin_job_core_filter3(gres_mc_data_t *mc_ptr,
req_cores = *max_tasks_this_node;
if (mc_ptr->cpus_per_task) {
int threads_per_core, removed_tasks = 0;
+ int efctv_cpt = mc_ptr->cpus_per_task;
if (mc_ptr->threads_per_core)
threads_per_core =
@@ -8478,7 +8479,14 @@ extern void gres_plugin_job_core_filter3(gres_mc_data_t *mc_ptr,
else
threads_per_core = cpus_per_core;
- req_cores *= mc_ptr->cpus_per_task;
+ if ((mc_ptr->ntasks_per_core == 1) &&
+ (efctv_cpt % threads_per_core)) {
+ efctv_cpt /= threads_per_core;
+ efctv_cpt++;
+ efctv_cpt *= threads_per_core;
+ }
+
+ req_cores *= efctv_cpt;
while (*max_tasks_this_node >= *min_tasks_this_node) {
/* round up by full threads per core */
@@ -8500,7 +8508,7 @@ extern void gres_plugin_job_core_filter3(gres_mc_data_t *mc_ptr,
removed_tasks++;
(*max_tasks_this_node)--;
req_cores = *max_tasks_this_node;
- req_cores *= mc_ptr->cpus_per_task;
+ req_cores *= efctv_cpt;
}
}
if (cpus_per_gres) {
@@ -11830,7 +11838,8 @@ extern void gres_plugin_job_set_env(char ***job_env_ptr, List job_gres_list,
*/
extern void gres_plugin_job_set_defs(List job_gres_list, char *gres_name,
uint64_t cpu_per_gpu, uint64_t mem_per_gpu,
- char **cpus_per_tres, char **mem_per_tres)
+ char **cpus_per_tres, char **mem_per_tres,
+ uint16_t *cpus_per_task)
{
uint32_t plugin_id;
ListIterator gres_iter;
@@ -11869,6 +11878,11 @@ extern void gres_plugin_job_set_defs(List job_gres_list, char *gres_name,
xstrfmtcat(*mem_per_tres, "gpu:%"PRIu64,
mem_per_gpu);
}
+ if (cpu_per_gpu && job_gres_data->gres_per_task) {
+ *cpus_per_task = MAX(*cpus_per_task,
+ (job_gres_data->gres_per_task *
+ cpu_per_gpu));
+ }
}
list_iterator_destroy(gres_iter);
}
diff --git a/src/common/gres.h b/src/common/gres.h
index 5b2e4b69..c67af955 100644
--- a/src/common/gres.h
+++ b/src/common/gres.h
@@ -1123,7 +1123,8 @@ extern void gres_plugin_job_set_env(char ***job_env_ptr, List job_gres_list,
*/
extern void gres_plugin_job_set_defs(List job_gres_list, char *gres_name,
uint64_t cpu_per_gpu, uint64_t mem_per_gpu,
- char **cpus_per_tres, char **mem_per_tres);
+ char **cpus_per_tres, char **mem_per_tres,
+ uint16_t *cpus_per_task);
/*
* Extract from the job record's gres_list the count of allocated resources of
diff --git a/src/common/node_conf.c b/src/common/node_conf.c
index 31d5e16a..79bea2e2 100644
--- a/src/common/node_conf.c
+++ b/src/common/node_conf.c
@@ -464,7 +464,7 @@ extern int check_nodeline_info(slurm_conf_node_t *node_ptr,
char *port_str = NULL;
int state_val = NODE_STATE_UNKNOWN;
int address_count, alias_count, bcast_count, hostname_count, port_count;
- uint16_t port = 0;
+ uint16_t port = slurm_conf.slurmd_port;
if ((node_ptr->nodenames == NULL) || (node_ptr->nodenames[0] == '\0'))
return -1;
@@ -571,8 +571,7 @@ extern int check_nodeline_info(slurm_conf_node_t *node_ptr,
node_ptr->port_str);
}
port = port_int;
- } else
- port = slurm_conf.slurmd_port;
+ }
(*_callback)(alias, hostname, address, bcast_address,
port, state_val, node_ptr, config_ptr);
diff --git a/src/common/node_features.c b/src/common/node_features.c
index 5bec01b4..4ffbe501 100644
--- a/src/common/node_features.c
+++ b/src/common/node_features.c
@@ -217,7 +217,7 @@ extern void node_features_g_step_config(bool mem_sort, bitstr_t *numa_bitmap)
for (i = 0; i < g_context_cnt; i++)
(*(ops[i].step_config))(mem_sort, numa_bitmap);
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_step_config");
+ END_TIMER2(__func__);
}
/* Reset plugin configuration information */
@@ -232,7 +232,7 @@ extern int node_features_g_reconfig(void)
for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
rc = (*(ops[i].reconfig))();
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_reconfig");
+ END_TIMER2(__func__);
return rc;
}
@@ -250,7 +250,7 @@ extern bool node_features_g_changeable_feature(char *feature)
for (i = 0; ((i < g_context_cnt) && !changeable); i++)
changeable = (*(ops[i].changeable_feature))(feature);
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_reconfig");
+ END_TIMER2(__func__);
return changeable;
}
@@ -268,7 +268,7 @@ extern int node_features_g_get_node(char *node_list)
for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
rc = (*(ops[i].get_node))(node_list);
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_get_node");
+ END_TIMER2(__func__);
return rc;
}
@@ -285,7 +285,7 @@ extern int node_features_g_job_valid(char *job_features)
for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
rc = (*(ops[i].job_valid))(job_features);
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_job_valid");
+ END_TIMER2(__func__);
return rc;
}
@@ -317,7 +317,7 @@ extern char *node_features_g_job_xlate(char *job_features)
}
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_job_xlate");
+ END_TIMER2(__func__);
return node_features;
}
@@ -338,7 +338,7 @@ extern bitstr_t *node_features_g_get_node_bitmap(void)
break;
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_get_node_bitmap");
+ END_TIMER2(__func__);
return node_bitmap;
}
@@ -356,7 +356,7 @@ extern int node_features_g_overlap(bitstr_t *active_bitmap)
for (i = 0; i < g_context_cnt; i++)
cnt += (*(ops[i].overlap))(active_bitmap);
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_overlap");
+ END_TIMER2(__func__);
return cnt;
}
@@ -377,7 +377,7 @@ extern bool node_features_g_node_power(void)
break;
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_node_power");
+ END_TIMER2(__func__);
return node_power;
}
@@ -398,7 +398,7 @@ extern int node_features_g_node_set(char *active_features)
rc = (*(ops[i].node_set))(active_features);
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_node_set");
+ END_TIMER2(__func__);
return rc;
}
@@ -418,7 +418,7 @@ extern void node_features_g_node_state(char **avail_modes, char **current_mode)
(*(ops[i].node_state))(avail_modes, current_mode);
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_node_state");
+ END_TIMER2(__func__);
}
/* Note the active features associated with a set of nodes have been updated.
@@ -439,7 +439,7 @@ extern int node_features_g_node_update(char *active_features,
rc = (*(ops[i].node_update))(active_features, node_bitmap);
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_node_update");
+ END_TIMER2(__func__);
return rc;
}
@@ -469,7 +469,7 @@ extern bool node_features_g_node_update_valid(void *node_ptr,
break;
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_node_update_valid");
+ END_TIMER2(__func__);
return update_valid;
}
@@ -511,7 +511,7 @@ extern char *node_features_g_node_xlate(char *new_features, char *orig_features,
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_node_xlate");
+ END_TIMER2(__func__);
return new_value;
}
@@ -541,7 +541,7 @@ extern char *node_features_g_node_xlate2(char *new_features)
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_node_xlate2");
+ END_TIMER2(__func__);
return new_value;
}
@@ -561,7 +561,7 @@ extern bool node_features_g_user_update(uid_t uid)
result = (*(ops[i].user_update))(uid);
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_user_update");
+ END_TIMER2(__func__);
return result;
}
@@ -580,7 +580,7 @@ extern uint32_t node_features_g_boot_time(void)
boot_time = MAX(boot_time, (*(ops[i].boot_time))());
}
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_user_update");
+ END_TIMER2(__func__);
return boot_time;
}
@@ -612,8 +612,7 @@ extern List node_features_g_get_config(void)
list_append(conf_list, p);
}
slurm_mutex_unlock(&g_context_lock);
-
- END_TIMER2("node_features_g_get_config");
+ END_TIMER2(__func__);
return conf_list;
}
@@ -632,7 +631,7 @@ extern uint32_t node_features_g_reboot_weight(void)
if (g_context_cnt > 0)
weight = (*(ops[0].reboot_weight))();
slurm_mutex_unlock(&g_context_lock);
- END_TIMER2("node_features_g_reboot_weight");
+ END_TIMER2(__func__);
return weight;
}
diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c
index 4466f4da..31ef7d8c 100644
--- a/src/common/slurm_cred.c
+++ b/src/common/slurm_cred.c
@@ -379,8 +379,8 @@ static int _fill_cred_gids(slurm_cred_t *cred, slurm_cred_arg_t *arg)
rc = slurm_getpwuid_r(arg->uid, &pwd, buffer, PW_BUF_SIZE, &result);
if (rc || !result) {
- error("%s: getpwuid failed for uid=%u",
- __func__, arg->uid);
+ error("%s: getpwuid failed for uid=%u: %s",
+ __func__, arg->uid, slurm_strerror(rc));
return SLURM_ERROR;
}
diff --git a/src/common/slurm_persist_conn.c b/src/common/slurm_persist_conn.c
index 3733d656..afb59cb4 100644
--- a/src/common/slurm_persist_conn.c
+++ b/src/common/slurm_persist_conn.c
@@ -169,14 +169,14 @@ static bool _conn_readable(slurm_persist_conn_t *persist_conn)
return false;
}
if (ufds.revents & POLLERR) {
- int sockerr;
- if (fd_get_socket_error(ufds.fd, &sockerr))
+ int sockerr, fd_rc;
+ if (!(fd_rc = fd_get_socket_error(ufds.fd, &sockerr)))
error("%s: persistent connection for fd %d experienced error[%d]: %s",
__func__, ufds.fd, sockerr,
slurm_strerror(sockerr));
else
- error("%s: persistent connection for fd %d experienced an unknown error",
- __func__, ufds.fd);
+ error("%s: persistent connection for fd %d experienced an error getting socket error: %s",
+ __func__, ufds.fd, slurm_strerror(fd_rc));
return false;
}
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index f5f21ca7..d32163e4 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -3078,6 +3078,8 @@ extern uint32_t job_state_num(const char *state_name)
return JOB_CONFIGURING;
if (_job_name_test(JOB_RESIZING, state_name))
return JOB_RESIZING;
+ if (_job_name_test(JOB_RESV_DEL_HOLD, state_name))
+ return JOB_RESV_DEL_HOLD;
if (_job_name_test(JOB_REQUEUE_CRON, state_name))
return JOB_REQUEUE_CRON;
if (_job_name_test(JOB_REQUEUE, state_name))
@@ -3430,14 +3432,22 @@ extern char *bb_state_string(uint16_t state)
return "allocating";
if (state == BB_STATE_ALLOCATED)
return "allocated";
+ if (state == BB_STATE_DELETING)
+ return "deleting";
+ if (state == BB_STATE_DELETED)
+ return "deleted";
if (state == BB_STATE_STAGING_IN)
return "staging-in";
if (state == BB_STATE_STAGED_IN)
return "staged-in";
+ if (state == BB_STATE_PRE_RUN)
+ return "pre-run";
if (state == BB_STATE_RUNNING)
return "running";
if (state == BB_STATE_SUSPEND)
return "suspended";
+ if (state == BB_STATE_POST_RUN)
+ return "post-run";
if (state == BB_STATE_STAGING_OUT)
return "staging-out";
if (state == BB_STATE_STAGED_OUT)
@@ -3461,14 +3471,22 @@ extern uint16_t bb_state_num(char *tok)
return BB_STATE_ALLOCATING;
if (!xstrcasecmp(tok, "allocated"))
return BB_STATE_ALLOCATED;
+ if (!xstrcasecmp(tok, "deleting"))
+ return BB_STATE_DELETING;
+ if (!xstrcasecmp(tok, "deleted"))
+ return BB_STATE_DELETED;
if (!xstrcasecmp(tok, "staging-in"))
return BB_STATE_STAGING_IN;
if (!xstrcasecmp(tok, "staged-in"))
return BB_STATE_STAGED_IN;
+ if (!xstrcasecmp(tok, "pre-run"))
+ return BB_STATE_PRE_RUN;
if (!xstrcasecmp(tok, "running"))
return BB_STATE_RUNNING;
if (!xstrcasecmp(tok, "suspend"))
return BB_STATE_SUSPEND;
+ if (!xstrcasecmp(tok, "post-run"))
+ return BB_STATE_POST_RUN;
if (!xstrcasecmp(tok, "staging-out"))
return BB_STATE_STAGING_OUT;
if (!xstrcasecmp(tok, "staged-out"))
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c
index 67af9480..5f8f0fec 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c
@@ -1631,6 +1631,7 @@ static int _process_modify_assoc_results(mysql_conn_t *mysql_conn,
"replace(replace("
"qos, ',%s,', ','), "
"',,', ','))"
+ ", qos=if (qos=',', '', qos)"
", delta_qos=if (qos='', "
"replace(concat(replace("
"replace("
@@ -2564,7 +2565,10 @@ extern int as_mysql_add_assocs(mysql_conn_t *mysql_conn, uint32_t uid,
if (object->is_def != 1)
object->is_def = 0;
- list_append(local_cluster_list, object->cluster);
+ if (!list_find_first(local_cluster_list,
+ slurm_find_char_in_list,
+ object->cluster))
+ list_append(local_cluster_list, object->cluster);
if (object->parent_acct) {
parent = object->parent_acct;
@@ -2605,7 +2609,10 @@ extern int as_mysql_add_assocs(mysql_conn_t *mysql_conn, uint32_t uid,
xstrfmtcat(extra, ", `partition`='%s'", part);
if (!added_user_list)
added_user_list = list_create(NULL);
- list_append(added_user_list, object->user);
+ if (!list_find_first(added_user_list,
+ slurm_find_char_in_list,
+ object->user))
+ list_append(added_user_list, object->user);
}
if (object->id) {
diff --git a/src/plugins/accounting_storage/mysql/as_mysql_wckey.c b/src/plugins/accounting_storage/mysql/as_mysql_wckey.c
index a90527d0..8623299a 100644
--- a/src/plugins/accounting_storage/mysql/as_mysql_wckey.c
+++ b/src/plugins/accounting_storage/mysql/as_mysql_wckey.c
@@ -523,7 +523,10 @@ extern int as_mysql_add_wckeys(mysql_conn_t *mysql_conn, uint32_t uid,
if (!added_user_list)
added_user_list = list_create(NULL);
- list_append(added_user_list, object->user);
+ if (!list_find_first(added_user_list,
+ slurm_find_char_in_list,
+ object->user))
+ list_append(added_user_list, object->user);
xstrcat(cols, "creation_time, mod_time, user");
xstrfmtcat(vals, "%ld, %ld, '%s'",
now, now, object->user);
@@ -580,7 +583,10 @@ extern int as_mysql_add_wckeys(mysql_conn_t *mysql_conn, uint32_t uid,
continue;
}
- list_append(local_cluster_list, object->cluster);
+ if (!list_find_first(local_cluster_list,
+ slurm_find_char_in_list,
+ object->cluster))
+ list_append(local_cluster_list, object->cluster);
/* we always have a ', ' as the first 2 chars */
tmp_extra = slurm_add_slash_to_quotes(extra+2);
diff --git a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c
index 247eae24..9160f85e 100644
--- a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c
+++ b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c
@@ -360,12 +360,13 @@ static void _get_joules_task(acct_gather_energy_t *energy)
(uint64_t)ret - energy->base_consumed_energy;
energy->current_watts =
(uint32_t)ret - energy->previous_consumed_energy;
- energy->ave_watts = ((energy->ave_watts * readings) +
- energy->current_watts) / (readings + 1);
interval = time(NULL) - energy->poll_time;
if (interval) /* Prevent divide by zero */
energy->current_watts /= (float)interval;
+
+ energy->ave_watts = ((energy->ave_watts * readings) +
+ energy->current_watts) / (readings + 1);
} else {
energy->consumed_energy = 1;
energy->base_consumed_energy = (uint64_t)ret;
diff --git a/src/plugins/auth/jwt/auth_jwt.c b/src/plugins/auth/jwt/auth_jwt.c
index ff3b62a1..eef45f35 100644
--- a/src/plugins/auth/jwt/auth_jwt.c
+++ b/src/plugins/auth/jwt/auth_jwt.c
@@ -139,10 +139,11 @@ static int _init_key(void)
/* default to state_save_location for slurmctld */
xstrfmtcat(key_file, "%s/%s",
slurm_conf.state_save_location, default_key);
- }
-
- if (!key_file)
+ } else if (!key_file) {
+ /* Must be in slurmdbd */
+ error("No jwt_key set. Please set the jwt_key=/path/to/key/file option in AuthAltParameters in slurmdbd.conf.");
return ESLURM_AUTH_SKIP;
+ }
debug("%s: Loading key: %s", __func__, key_file);
@@ -259,7 +260,7 @@ int slurm_auth_verify(auth_token_t *cred, char *auth_info)
error("%s: uid_from_string failure", __func__);
goto fail;
}
- if (!validate_slurm_user(uid)) {
+ if ((uid != 0) && (slurm_conf.slurm_user_id != uid)) {
error("%s: attempt to authenticate as alternate user %s from non-SlurmUser %s",
__func__, username, cred->username);
goto fail;
diff --git a/src/plugins/burst_buffer/datawarp/burst_buffer_datawarp.c b/src/plugins/burst_buffer/datawarp/burst_buffer_datawarp.c
index 99eddf1b..995ea122 100644
--- a/src/plugins/burst_buffer/datawarp/burst_buffer_datawarp.c
+++ b/src/plugins/burst_buffer/datawarp/burst_buffer_datawarp.c
@@ -335,6 +335,34 @@ static void _job_queue_del(void *x)
}
}
+static void _set_bb_state(job_record_t *job_ptr, bb_job_t *bb_job,
+ int new_state)
+{
+ const char *new_state_str = NULL;
+
+ xassert(bb_job);
+
+ /*
+ * Set state (integer) in bb_job and set the state (string) in job_ptr.
+ * bb_job is used in this plugin. The string is used to display to the
+ * user and to save the state in StateSaveLocation if slurmctld is
+ * ever restarted.
+ */
+ new_state_str = bb_state_string(new_state);
+ bb_job->state = new_state;
+ if (!job_ptr) {
+ /* This should never happen, but handle it just in case. */
+ error("%s: Could not find job_ptr for JobId=%u, unable to set new burst buffer state %s in job.",
+ __func__, bb_job->job_id, new_state_str);
+ return;
+ }
+
+ log_flag(BURST_BUF, "Modify %pJ burst buffer state from %s to %s",
+ job_ptr, job_ptr->burst_buffer_state, new_state_str);
+ xfree(job_ptr->burst_buffer_state);
+ job_ptr->burst_buffer_state = xstrdup(new_state_str);
+}
+
/* Purge files we have created for the job.
* bb_state.bb_mutex is locked on function entry.
* job_ptr may be NULL if not found */
@@ -405,10 +433,10 @@ static int _alloc_job_bb(job_record_t *job_ptr, bb_job_t *bb_job,
return EAGAIN;
if (bb_job->state < BB_STATE_STAGING_IN) {
- bb_job->state = BB_STATE_STAGING_IN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_STAGING_IN);
rc = _queue_stage_in(job_ptr, bb_job);
if (rc != SLURM_SUCCESS) {
- bb_job->state = BB_STATE_TEARDOWN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_TEARDOWN);
_queue_teardown(job_ptr->job_id, job_ptr->user_id,
true);
}
@@ -477,6 +505,7 @@ static bb_job_t *_get_bb_job(job_record_t *job_ptr)
uint64_t tmp_cnt;
int inx;
bb_job_t *bb_job;
+ uint16_t new_bb_state;
if ((job_ptr->burst_buffer == NULL) ||
(job_ptr->burst_buffer[0] == '\0'))
@@ -491,7 +520,9 @@ static bb_job_t *_get_bb_job(job_record_t *job_ptr)
bb_job->partition = xstrdup(job_ptr->part_ptr->name);
if (job_ptr->qos_ptr)
bb_job->qos = xstrdup(job_ptr->qos_ptr->name);
- bb_job->state = BB_STATE_PENDING;
+ new_bb_state = job_ptr->burst_buffer_state ?
+ bb_state_num(job_ptr->burst_buffer_state) : BB_STATE_PENDING;
+ _set_bb_state(job_ptr, bb_job, new_bb_state);
bb_job->user_id = job_ptr->user_id;
bb_specs = xstrdup(job_ptr->burst_buffer);
tok = strtok_r(bb_specs, "\n", &save_ptr);
@@ -890,23 +921,6 @@ static int _open_part_state_file(char **state_file)
return state_fd;
}
-/* Return true if the burst buffer name is that of a job (i.e. numeric) and
- * and that job is complete. Otherwise return false. */
-static bool _is_complete_job(char *name)
-{
- char *end_ptr = NULL;
- uint32_t job_id = 0;
- job_record_t *job_ptr;
-
- if (name && (name[0] >='0') && (name[0] <='9')) {
- job_id = strtol(name, &end_ptr, 10);
- job_ptr = find_job_record(job_id);
- if (!job_ptr || IS_JOB_COMPLETED(job_ptr))
- return true;
- }
- return false;
-}
-
/* Recover saved burst buffer state and use it to preserve account, partition,
* and QOS information for persistent burst buffers. */
static void _recover_bb_state(void)
@@ -978,12 +992,7 @@ static void _recover_bb_state(void)
safe_unpack64(&size, buffer);
}
- if ((bb_state.bb_config.flags & BB_FLAG_EMULATE_CRAY) &&
- _is_complete_job(name)) {
- info("Ignoring burst buffer state for completed job %s",
- name);
- bb_alloc = NULL;
- } else if (bb_state.bb_config.flags & BB_FLAG_EMULATE_CRAY) {
+ if (bb_state.bb_config.flags & BB_FLAG_EMULATE_CRAY) {
bb_alloc = bb_alloc_name_rec(&bb_state, name, user_id);
bb_alloc->id = id;
last_persistent_id = MAX(last_persistent_id, id);
@@ -1453,6 +1462,7 @@ static int _queue_stage_in(job_record_t *job_ptr, bb_job_t *bb_job)
int hash_inx = job_ptr->job_id % 10;
int rc = SLURM_SUCCESS;
pthread_t tid;
+ bb_alloc_t *bb_alloc = NULL;
xstrfmtcat(hash_dir, "%s/hash.%d",
slurm_conf.state_save_location, hash_inx);
@@ -1493,6 +1503,19 @@ static int _queue_stage_in(job_record_t *job_ptr, bb_job_t *bb_job)
#endif
setup_argv[16] = xstrdup(client_nodes_file_nid);
}
+ /*
+ * Create bb allocation for the job now. Check if it has already been
+ * created (perhaps it was created but then slurmctld restarted).
+ * bb_alloc is the structure that is state saved.
+ * If we wait until the _start_stage_in thread to create bb_alloc,
+ * we introduce a race condition where the thread could be killed
+ * (if slurmctld is shut down) before the thread creates
+ * bb_alloc. That race would mean the burst buffer isn't state saved.
+ */
+ if (!(bb_alloc = bb_find_alloc_rec(&bb_state, job_ptr))) {
+ bb_alloc = bb_alloc_job(&bb_state, job_ptr, bb_job);
+ bb_alloc->create_time = time(NULL);
+ }
bb_limit_add(job_ptr->user_id, bb_job->total_size, job_pool, &bb_state,
true);
@@ -1623,7 +1646,6 @@ static void *_start_stage_in(void *x)
track_script_reset_cpid(pthread_self(), 0);
_log_script_argv(setup_argv, resp_msg);
- lock_slurmctld(job_write_lock);
slurm_mutex_lock(&bb_state.bb_mutex);
/*
* The buffer's actual size may be larger than requested by the user.
@@ -1634,42 +1656,34 @@ static void *_start_stage_in(void *x)
&bb_state);
if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
+ /*
+ * Unlock bb_mutex before locking job_write_lock to avoid
+ * deadlock, since job_write_lock is always locked first.
+ */
+ slurm_mutex_unlock(&bb_state.bb_mutex);
trigger_burst_buffer();
error("setup for JobId=%u status:%u response:%s",
stage_args->job_id, status,
resp_msg);
rc = SLURM_ERROR;
+ lock_slurmctld(job_write_lock);
job_ptr = find_job_record(stage_args->job_id);
if (job_ptr)
_update_system_comment(job_ptr, "setup", resp_msg, 0);
+ unlock_slurmctld(job_write_lock);
} else {
- job_ptr = find_job_record(stage_args->job_id);
bb_job = bb_job_find(&bb_state, stage_args->job_id);
- if (!job_ptr) {
- error("unable to find job record for JobId=%u",
+ if (!bb_job) {
+ error("unable to find bb_job record for JobId=%u",
stage_args->job_id);
rc = SLURM_ERROR;
- } else if (!bb_job) {
- error("unable to find bb_job record for %pJ",
- job_ptr);
- } else {
- bb_job->state = BB_STATE_STAGING_IN;
- bb_alloc = bb_find_alloc_rec(&bb_state, job_ptr);
- if (!bb_alloc && bb_job->total_size) {
- /* Not found (from restart race condtion) and
- * job buffer has non-zero size */
- bb_alloc = bb_alloc_job(&bb_state, job_ptr,
- bb_job);
- bb_limit_add(stage_args->user_id,
- bb_job->total_size,
- stage_args->pool, &bb_state,
- true);
- bb_alloc->create_time = time(NULL);
- }
+ } else if (bb_job->total_size) {
+ /* Restore limit based upon actual size. */
+ bb_limit_add(stage_args->user_id, bb_job->total_size,
+ stage_args->pool, &bb_state, true);
}
+ slurm_mutex_unlock(&bb_state.bb_mutex);
}
- slurm_mutex_unlock(&bb_state.bb_mutex);
- unlock_slurmctld(job_write_lock);
if (rc == SLURM_SUCCESS) {
timeout = bb_state.bb_config.stage_in_timeout * 1000;
@@ -1802,7 +1816,7 @@ static void *_start_stage_in(void *x)
slurm_mutex_lock(&bb_state.bb_mutex);
bb_job = bb_job_find(&bb_state, stage_args->job_id);
if (bb_job)
- bb_job->state = BB_STATE_STAGED_IN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_STAGED_IN);
if (bb_job && bb_job->total_size) {
if (real_size > bb_job->req_size) {
info("%pJ total_size increased from %"PRIu64" to %"PRIu64,
@@ -1971,7 +1985,7 @@ static void *_start_stage_out(void *x)
slurm_mutex_lock(&bb_state.bb_mutex);
bb_job = _get_bb_job(job_ptr);
if (bb_job)
- bb_job->state = BB_STATE_STAGING_OUT;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_STAGING_OUT);
slurm_mutex_unlock(&bb_state.bb_mutex);
}
unlock_slurmctld(job_write_lock);
@@ -2049,7 +2063,7 @@ static void *_start_stage_out(void *x)
slurm_mutex_lock(&bb_state.bb_mutex);
bb_job = _get_bb_job(job_ptr);
if ((rc == SLURM_SUCCESS) && bb_job)
- bb_job->state = BB_STATE_TEARDOWN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_TEARDOWN);
bb_alloc = bb_find_alloc_rec(&bb_state, job_ptr);
if (bb_alloc) {
if (rc == SLURM_SUCCESS) {
@@ -2248,7 +2262,8 @@ static void *_start_teardown(void *x)
(void) bb_free_alloc_rec(&bb_state, bb_alloc);
}
if ((bb_job = _get_bb_job(job_ptr)))
- bb_job->state = BB_STATE_COMPLETE;
+ _set_bb_state(job_ptr, bb_job,
+ BB_STATE_COMPLETE);
job_ptr->job_state &= (~JOB_STAGE_OUT);
if (!IS_JOB_PENDING(job_ptr) && /* No email if requeue */
(job_ptr->mail_type & MAIL_JOB_STAGE_OUT)) {
@@ -3185,6 +3200,133 @@ extern int fini(void)
return SLURM_SUCCESS;
}
+static void _pre_queue_stage_out(job_record_t *job_ptr, bb_job_t *bb_job)
+{
+ _set_bb_state(job_ptr, bb_job, BB_STATE_POST_RUN);
+ job_ptr->job_state |= JOB_STAGE_OUT;
+ xfree(job_ptr->state_desc);
+ xstrfmtcat(job_ptr->state_desc, "%s: Stage-out in progress",
+ plugin_type);
+ _queue_stage_out(job_ptr, bb_job);
+}
+
+/*
+ * This function should only be called from _purge_vestigial_bufs().
+ * We need to reset the burst buffer state and restart any threads that may
+ * have been running before slurmctld was shutdown, depending on the state
+ * that the burst buffer is in.
+ */
+static void _recover_job_bb(job_record_t *job_ptr, bb_alloc_t *bb_alloc,
+ time_t defer_time)
+{
+ bb_job_t *bb_job;
+ uint16_t job_bb_state = bb_state_num(job_ptr->burst_buffer_state);
+
+ /*
+ * Call _get_bb_job() to create a cache of the job's burst buffer info,
+ * including the state. Lots of functions will call this so do it now to
+ * create the cache, and we may need to change the burst buffer state.
+ * The job burst buffer state is set in job_ptr and in bb_job.
+ * bb_alloc is used for persistent burst buffers, so bb_alloc->state
+ * isn't used for job burst buffers.
+ */
+ bb_job = _get_bb_job(job_ptr);
+ if (!bb_job) {
+ /* This shouldn't happen. */
+ error("%s: %pJ does not have a burst buffer specification, tearing down vestigial burst buffer.",
+ __func__, job_ptr);
+ _queue_teardown(bb_alloc->job_id, bb_alloc->user_id, false);
+ return;
+ }
+
+ switch(job_bb_state) {
+ /*
+ * First 4 states are specific to persistent burst buffers.
+ * We shouldn't get here since _purge_vestigial_bufs() handles
+ * persistent burst buffers.
+ */
+ case BB_STATE_ALLOCATING:
+ case BB_STATE_ALLOCATED:
+ case BB_STATE_DELETING:
+ case BB_STATE_DELETED:
+ error("%s: Unexpected burst buffer state %s for %pJ",
+ __func__, job_ptr->burst_buffer_state, job_ptr);
+ break;
+ /* Pending states for jobs: */
+ case BB_STATE_STAGING_IN:
+ case BB_STATE_STAGED_IN:
+ case BB_STATE_ALLOC_REVOKE:
+ /*
+ * We do not know the state of file staging,
+ * so teardown the buffer and defer the job
+ * for at least 60 seconds (for the teardown).
+ * Also set the burst buffer state back to PENDING.
+ */
+ log_flag(BURST_BUF, "Purging buffer for pending %pJ",
+ job_ptr);
+ _set_bb_state(job_ptr, bb_job, BB_STATE_TEARDOWN);
+ _queue_teardown(bb_alloc->job_id,
+ bb_alloc->user_id, true);
+ if (job_ptr->details &&
+ (job_ptr->details->begin_time < defer_time)){
+ job_ptr->details->begin_time =
+ defer_time;
+ }
+ break;
+ /* Running states for jobs: */
+ case BB_STATE_PRE_RUN:
+ /*
+ * slurmctld will call bb_g_job_begin() which will
+ * handle burst buffers in this state.
+ */
+ break;
+ case BB_STATE_RUNNING:
+ case BB_STATE_SUSPEND:
+ /* Nothing to do here. */
+ break;
+ /* Post running states for jobs: */
+ case BB_STATE_POST_RUN:
+ case BB_STATE_STAGING_OUT:
+ case BB_STATE_STAGED_OUT:
+ log_flag(BURST_BUF, "Restarting burst buffer stage out for %pJ",
+ job_ptr);
+ /*
+ * _pre_queue_stage_out() sets the burst buffer state
+ * correctly and restarts the needed thread.
+ */
+ _pre_queue_stage_out(job_ptr, bb_job);
+ break;
+ case BB_STATE_TEARDOWN:
+ case BB_STATE_TEARDOWN_FAIL:
+ log_flag(BURST_BUF, "Restarting burst buffer teardown for %pJ",
+ job_ptr);
+ _queue_teardown(bb_alloc->job_id,
+ bb_alloc->user_id, false);
+ break;
+ case BB_STATE_COMPLETE:
+ /*
+ * We shouldn't get here since the bb_alloc record is
+ * removed when the job's bb state is set to
+ * BB_STATE_COMPLETE during teardown.
+ */
+ log_flag(BURST_BUF, "Clearing burst buffer for completed job %pJ",
+ job_ptr);
+ /*
+ * Subtract the space this burst buffer was allocated
+ * since _load_state() calls _apply_limits()
+ * which calls bb_limit_add() for all burst buffers.
+ */
+ bb_limit_rem(bb_alloc->user_id, bb_alloc->size,
+ bb_alloc->pool, &bb_state);
+ (void) bb_free_alloc_rec(&bb_state, bb_alloc);
+ break;
+ default:
+ error("%s: Invalid job burst buffer state %s for %pJ",
+ __func__, job_ptr->burst_buffer_state, job_ptr);
+ break;
+ }
+}
+
/* Identify and purge any vestigial buffers (i.e. we have a job buffer, but
* the matching job is either gone or completed OR we have a job buffer and a
* pending job, but don't know the status of stage-in) */
@@ -3207,19 +3349,8 @@ static void _purge_vestigial_bufs(void)
bb_alloc->job_id);
_queue_teardown(bb_alloc->job_id,
bb_alloc->user_id, false);
- } else if (!IS_JOB_STARTED(job_ptr)) {
- /* We do not know the state of file staging,
- * so teardown the buffer and defer the job
- * for at least 60 seconds (for the teardown) */
- debug("Purging buffer for pending JobId=%u",
- bb_alloc->job_id);
- _queue_teardown(bb_alloc->job_id,
- bb_alloc->user_id, true);
- if (job_ptr->details &&
- (job_ptr->details->begin_time <defer_time)){
- job_ptr->details->begin_time =
- defer_time;
- }
+ } else {
+ _recover_job_bb(job_ptr, bb_alloc, defer_time);
}
bb_alloc = bb_alloc->next;
}
@@ -3835,7 +3966,8 @@ extern int bb_p_job_try_stage_in(List job_queue)
if (bb_job == NULL)
continue;
if (bb_job->state == BB_STATE_COMPLETE)
- bb_job->state = BB_STATE_PENDING; /* job requeued */
+ _set_bb_state(job_ptr, bb_job,
+ BB_STATE_PENDING); /* job requeued */
else if (bb_job->state >= BB_STATE_POST_RUN)
continue; /* Requeued job still staging out */
job_rec = xmalloc(sizeof(bb_job_queue_rec_t));
@@ -3900,7 +4032,8 @@ extern int bb_p_job_test_stage_in(job_record_t *job_ptr, bool test_only)
if (bb_state.last_load_time != 0)
bb_job = _get_bb_job(job_ptr);
if (bb_job && (bb_job->state == BB_STATE_COMPLETE))
- bb_job->state = BB_STATE_PENDING; /* job requeued */
+ _set_bb_state(job_ptr, bb_job,
+ BB_STATE_PENDING); /* job requeued */
if (bb_job == NULL) {
rc = -1;
} else if (bb_job->state < BB_STATE_STAGING_IN) {
@@ -3994,9 +4127,9 @@ extern int bb_p_job_begin(job_record_t *job_ptr)
slurm_conf.state_save_location, hash_inx, job_ptr->job_id);
xstrfmtcat(client_nodes_file_nid, "%s/client_nids", job_dir);
if (do_pre_run)
- bb_job->state = BB_STATE_PRE_RUN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_PRE_RUN);
else
- bb_job->state = BB_STATE_RUNNING;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_RUNNING);
if (bb_state.bb_config.flags & BB_FLAG_SET_EXEC_HOST)
set_exec_host = true;
else
@@ -4209,7 +4342,8 @@ static void *_start_pre_run(void *x)
if (IS_JOB_RUNNING(job_ptr))
run_kill_job = true;
if (bb_job) {
- bb_job->state = BB_STATE_TEARDOWN;
+ _set_bb_state(job_ptr, bb_job,
+ BB_STATE_TEARDOWN);
if (bb_job->retry_cnt++ > MAX_RETRY_CNT)
hold_job = true;
}
@@ -4219,9 +4353,9 @@ static void *_start_pre_run(void *x)
} else if (bb_job) {
/* Pre-run success and the job's BB record exists */
if (bb_job->state == BB_STATE_ALLOC_REVOKE)
- bb_job->state = BB_STATE_STAGED_IN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_STAGED_IN);
else
- bb_job->state = BB_STATE_RUNNING;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_RUNNING);
}
if (job_ptr) {
if (run_kill_job)
@@ -4260,9 +4394,9 @@ extern int bb_p_job_revoke_alloc(job_record_t *job_ptr)
bb_job = _get_bb_job(job_ptr);
if (bb_job) {
if (bb_job->state == BB_STATE_RUNNING)
- bb_job->state = BB_STATE_STAGED_IN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_STAGED_IN);
else if (bb_job->state == BB_STATE_PRE_RUN)
- bb_job->state = BB_STATE_ALLOC_REVOKE;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_ALLOC_REVOKE);
} else {
rc = SLURM_ERROR;
}
@@ -4300,15 +4434,10 @@ extern int bb_p_job_start_stage_out(job_record_t *job_ptr)
job_ptr);
} else if (bb_job->state < BB_STATE_RUNNING) {
/* Job never started. Just teardown the buffer */
- bb_job->state = BB_STATE_TEARDOWN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_TEARDOWN);
_queue_teardown(job_ptr->job_id, job_ptr->user_id, true);
} else if (bb_job->state < BB_STATE_POST_RUN) {
- bb_job->state = BB_STATE_POST_RUN;
- job_ptr->job_state |= JOB_STAGE_OUT;
- xfree(job_ptr->state_desc);
- xstrfmtcat(job_ptr->state_desc, "%s: Stage-out in progress",
- plugin_type);
- _queue_stage_out(job_ptr, bb_job);
+ _pre_queue_stage_out(job_ptr, bb_job);
}
slurm_mutex_unlock(&bb_state.bb_mutex);
@@ -4436,11 +4565,12 @@ extern int bb_p_job_cancel(job_record_t *job_ptr)
if (!bb_job) {
/* Nothing ever allocated, nothing to clean up */
} else if (bb_job->state == BB_STATE_PENDING) {
- bb_job->state = BB_STATE_COMPLETE; /* Nothing to clean up */
+ _set_bb_state(job_ptr, bb_job,
+ BB_STATE_COMPLETE); /* Nothing to clean up */
} else {
/* Note: Persistent burst buffer actions already completed
* for the job are not reversed */
- bb_job->state = BB_STATE_TEARDOWN;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_TEARDOWN);
bb_alloc = bb_find_alloc_rec(&bb_state, job_ptr);
if (bb_alloc) {
bb_alloc->state = BB_STATE_TEARDOWN;
@@ -4535,7 +4665,7 @@ static int _create_bufs(job_record_t *job_ptr, bb_job_t *bb_job,
}
bb_limit_add(job_ptr->user_id, buf_ptr->size,
buf_ptr->pool, &bb_state, true);
- bb_job->state = BB_STATE_ALLOCATING;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_ALLOCATING);
buf_ptr->state = BB_STATE_ALLOCATING;
create_args = xmalloc(sizeof(create_buf_data_t));
create_args->access = xstrdup(buf_ptr->access);
@@ -4575,7 +4705,7 @@ static int _create_bufs(job_record_t *job_ptr, bb_job_t *bb_job,
continue;
}
- bb_job->state = BB_STATE_DELETING;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_DELETING);
buf_ptr->state = BB_STATE_DELETING;
create_args = xmalloc(sizeof(create_buf_data_t));
create_args->hurry = buf_ptr->hurry;
@@ -4603,7 +4733,8 @@ static int _create_bufs(job_record_t *job_ptr, bb_job_t *bb_job,
job_ptr->user_id,
&bb_state);
if (bb_alloc && (bb_alloc->state == BB_STATE_ALLOCATED))
- bb_job->state = BB_STATE_ALLOCATED;
+ _set_bb_state(job_ptr, bb_job,
+ BB_STATE_ALLOCATED);
else
rc++;
}
@@ -4629,7 +4760,7 @@ static bool _test_persistent_use_ready(bb_job_t *bb_job,
bb_alloc = bb_find_name_rec(buf_ptr->name, job_ptr->user_id,
&bb_state);
if (bb_alloc && (bb_alloc->state == BB_STATE_ALLOCATED)) {
- bb_job->state = BB_STATE_ALLOCATED;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_ALLOCATED);
} else {
not_ready_cnt++;
break;
@@ -4707,10 +4838,11 @@ static void _reset_buf_state(uint32_t user_id, uint32_t job_id, char *name,
break;
}
if (!active_buf) {
+ job_record_t *job_ptr = find_job_record(job_id);
if (bb_job->state == BB_STATE_ALLOCATING)
- bb_job->state = BB_STATE_ALLOCATED;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_ALLOCATED);
else if (bb_job->state == BB_STATE_DELETING)
- bb_job->state = BB_STATE_DELETED;
+ _set_bb_state(job_ptr, bb_job, BB_STATE_DELETED);
queue_job_scheduler();
}
}
@@ -4963,6 +5095,11 @@ static void *_destroy_persistent(void *x)
} else {
assoc_mgr_lock_t assoc_locks =
{ .assoc = READ_LOCK, .qos = READ_LOCK };
+ /*
+ * job_write_lock needed for _reset_buf_state() since it will
+ * call _set_bb_state() to modify job_ptr->burst_buffer_state
+ */
+ lock_slurmctld(job_write_lock);
/* assoc_mgr needs locking to call bb_post_persist_delete */
if (bb_alloc)
assoc_mgr_lock(&assoc_locks);
@@ -4986,6 +5123,7 @@ static void *_destroy_persistent(void *x)
slurm_mutex_unlock(&bb_state.bb_mutex);
if (bb_alloc)
assoc_mgr_unlock(&assoc_locks);
+ unlock_slurmctld(job_write_lock);
}
xfree(resp_msg);
_free_create_args(destroy_args);
diff --git a/src/plugins/job_container/tmpfs/job_container_tmpfs.c b/src/plugins/job_container/tmpfs/job_container_tmpfs.c
index f18f14a6..392af8f6 100644
--- a/src/plugins/job_container/tmpfs/job_container_tmpfs.c
+++ b/src/plugins/job_container/tmpfs/job_container_tmpfs.c
@@ -58,6 +58,9 @@
#include "read_jcconf.h"
+static int _create_ns(uint32_t job_id, bool remount);
+static int _delete_ns(uint32_t job_id);
+
#if defined (__APPLE__)
extern slurmd_conf_t *conf __attribute__((weak_import));
#else
@@ -70,6 +73,8 @@ const uint32_t plugin_version = SLURM_VERSION_NUMBER;
static slurm_jc_conf_t *jc_conf = NULL;
static int step_ns_fd = -1;
+static bool force_rm = true;
+static List running_job_ids = NULL;
static int _create_paths(uint32_t job_id,
char *job_mount,
@@ -124,6 +129,70 @@ static int _create_paths(uint32_t job_id,
return SLURM_SUCCESS;
}
+static int _find_job_id_in_list(uint32_t *list_job_id, uint32_t *job_id)
+{
+ return (*list_job_id == *job_id);
+}
+
+static int _append_job_in_list(void *element, void *arg)
+{
+ step_loc_t *stepd = (step_loc_t *) element;
+ List job_id_list = (List) arg;
+
+ xassert(job_id_list);
+
+ if (!list_find_first(job_id_list, (ListFindF)_find_job_id_in_list,
+ &stepd->step_id.job_id)) {
+ if (stepd_connect(stepd->directory,
+ stepd->nodename,
+ &stepd->step_id,
+ &stepd->protocol_version) != -1)
+ list_append(job_id_list, &stepd->step_id.job_id);
+ }
+
+ return SLURM_SUCCESS;
+}
+
+static int _restore_ns(const char *path, const struct stat *st_buf, int type)
+{
+ int rc = SLURM_SUCCESS;
+ uint32_t job_id;
+ char ns_holder[PATH_MAX];
+ struct stat stat_buf;
+
+ if (type == FTW_NS) {
+ error("%s: Unreachable file of FTW_NS type: %s",
+ __func__, path);
+ rc = SLURM_ERROR;
+ } else if (type == FTW_DNR) {
+ error("%s: Unreadable directory: %s", __func__, path);
+ rc = SLURM_ERROR;
+ } else if (type == FTW_D && xstrcmp(jc_conf->basepath, path)) {
+ /* Lookup for .ns file inside. If exists, try to restore. */
+ if (snprintf(ns_holder, PATH_MAX, "%s/.ns", path) >= PATH_MAX) {
+ error("%s: Unable to build ns_holder path %s: %m",
+ __func__, ns_holder);
+ rc = SLURM_ERROR;
+ } else if (stat(ns_holder, &stat_buf) < 0) {
+ debug3("%s: ignoring wrong ns_holder path %s: %m",
+ __func__, ns_holder);
+ } else {
+ job_id = slurm_atoul(&(xstrrchr(path, '/')[1]));
+ /* At this point we can remount the folder. */
+ if (_create_ns(job_id, true)) {
+ rc = SLURM_ERROR;
+ /* And then, properly delete it for dead jobs. */
+ } else if (!list_find_first(
+ running_job_ids,
+ (ListFindF)_find_job_id_in_list,
+ &job_id)) {
+ rc = _delete_ns(job_id);
+ }
+ }
+ }
+ return rc;
+}
+
extern void container_p_reconfig(void)
{
return;
@@ -151,7 +220,7 @@ extern int init(void)
*/
extern int fini(void)
{
- int rc = 0;
+ int rc = SLURM_SUCCESS;
debug("%s unloaded", plugin_name);
@@ -164,24 +233,24 @@ extern int fini(void)
error("%s: Configuration not loaded", __func__);
return SLURM_ERROR;
}
- rc = umount2(jc_conf->basepath, MNT_DETACH);
- if (rc) {
- error("%s: umount2: %s failed: %s",
- __func__, jc_conf->basepath, strerror(errno));
- return SLURM_ERROR;
- }
- free_jc_conf();
-
if (step_ns_fd != -1) {
close(step_ns_fd);
step_ns_fd = -1;
}
+ if (umount2(jc_conf->basepath, MNT_DETACH)) {
+ error("%s: umount2: %s failed: %s",
+ __func__, jc_conf->basepath, strerror(errno));
+ rc = SLURM_ERROR;
+ }
+ free_jc_conf();
- return SLURM_SUCCESS;
+ return rc;
}
extern int container_p_restore(char *dir_name, bool recover)
{
+ List steps;
+
#ifdef HAVE_NATIVE_CRAY
return SLURM_SUCCESS;
#endif
@@ -237,6 +306,10 @@ extern int container_p_restore(char *dir_name, bool recover)
}
+ /* It could fail if no leaks, it can clean as much leaks as possible. */
+ if (umount2(jc_conf->basepath, MNT_DETACH))
+ debug2("umount2: %s failed: %s", jc_conf->basepath, strerror(errno));
+
#if !defined(__APPLE__) && !defined(__FreeBSD__)
/*
* MS_BIND mountflag would make mount() ignore all other mountflags
@@ -259,6 +332,27 @@ extern int container_p_restore(char *dir_name, bool recover)
#endif
debug3("tmpfs: Base namespace created");
+ steps = stepd_available(conf->spooldir, conf->node_name);
+ running_job_ids = list_create(NULL);
+
+ /* Iterate over steps, and check once per job if it's still running. */
+ (void)list_for_each(steps, _append_job_in_list, running_job_ids);
+ FREE_NULL_LIST(steps);
+
+ /*
+ * Iterate over basepath, restore only the folders that seem bounded to
+ * real jobs (have .ns file). NOTE: Restoring the state could be either
+ * deleting the folder if the job is died and resources are free, or
+ * mount it otherwise.
+ */
+ if (ftw(jc_conf->basepath, _restore_ns, 64)) {
+ error("%s: Directory traversal failed: %s: %s",
+ __func__, jc_conf->basepath, strerror(errno));
+ FREE_NULL_LIST(running_job_ids);
+ return SLURM_ERROR;
+ }
+ FREE_NULL_LIST(running_job_ids);
+
return SLURM_SUCCESS;
}
@@ -307,20 +401,34 @@ static int _mount_private_shm(void)
static int _rm_data(const char *path, const struct stat *st_buf,
int type, struct FTW *ftwbuf)
{
+ int rc = SLURM_SUCCESS;
+
if (remove(path) < 0) {
+ log_level_t log_lvl;
+ if (force_rm) {
+ rc = SLURM_ERROR;
+ log_lvl = LOG_LEVEL_ERROR;
+ } else
+ log_lvl = LOG_LEVEL_DEBUG2;
+
if (type == FTW_NS)
- error("%s: Unreachable file of FTW_NS type: %s",
- __func__, path);
- if (type == FTW_DNR)
- error("%s: Unreadable directory: %s", __func__, path);
- error("%s: could not remove path: %s: %s",
- __func__, path, strerror(errno));
- return errno;
+ log_var(log_lvl,
+ "%s: Unreachable file of FTW_NS type: %s",
+ __func__, path);
+ else if (type == FTW_DNR)
+ log_var(log_lvl,
+ "%s: Unreadable directory: %s",
+ __func__, path);
+
+ log_var(log_lvl,
+ "%s: could not remove path: %s: %s",
+ __func__, path, strerror(errno));
}
- return 0;
+
+ return rc;
}
-extern int container_p_create(uint32_t job_id)
+static int _create_ns(uint32_t job_id, bool remount)
{
char job_mount[PATH_MAX];
char ns_holder[PATH_MAX];
@@ -347,7 +455,7 @@ extern int container_p_create(uint32_t job_id)
error("%s: mkdir %s failed: %s",
__func__, job_mount, strerror(errno));
return -1;
- } else if (rc && errno == EEXIST) {
+ } else if (!remount && rc && errno == EEXIST) {
/* stat to see if .active exists */
struct stat st;
rc = stat(active, &st);
@@ -392,7 +500,7 @@ extern int container_p_create(uint32_t job_id)
}
rc = mkdir(src_bind, 0700);
- if (rc) {
+ if (rc && (!remount || errno != EEXIST)) {
error("%s: mkdir failed %s, %s",
__func__, src_bind, strerror(errno));
goto exit2;
@@ -527,14 +635,10 @@ extern int container_p_create(uint32_t job_id)
goto exit1;
}
- rc = waitpid(cpid, &wstatus, 0);
- if (rc == -1) {
+ if ((waitpid(cpid, &wstatus, 0) != cpid) || WEXITSTATUS(wstatus)) {
error("%s: waitpid failed", __func__);
+ rc = SLURM_ERROR;
goto exit1;
- } else {
- if (rc == cpid)
- debug3("child exited: %d",
- WEXITSTATUS(wstatus));
}
rc = 0;
@@ -549,6 +653,7 @@ exit1:
exit2:
if (rc) {
/* cleanup the job mount */
+ force_rm = true;
if (nftw(job_mount, _rm_data, 64, FTW_DEPTH|FTW_PHYS) < 0) {
error("%s: Directory traversal failed: %s: %s",
__func__, job_mount, strerror(errno));
@@ -560,6 +665,11 @@ exit2:
return rc;
}
+extern int container_p_create(uint32_t job_id)
+{
+ return _create_ns(job_id, false);
+}
+
/* Add a process to a job container, create the proctrack container to add */
extern int container_p_join_external(uint32_t job_id)
{
@@ -667,7 +777,7 @@ extern int container_p_join(uint32_t job_id, uid_t uid)
return SLURM_SUCCESS;
}
-extern int container_p_delete(uint32_t job_id)
+static int _delete_ns(uint32_t job_id)
{
char job_mount[PATH_MAX];
char ns_holder[PATH_MAX];
@@ -698,7 +808,9 @@ extern int container_p_delete(uint32_t job_id)
* Does -
* a post order traversal and delete directory after processing
* contents
+ * NOTE: Can happen EBUSY here so we need to ignore this.
*/
+ force_rm = false;
if (nftw(job_mount, _rm_data, 64, FTW_DEPTH|FTW_PHYS) < 0) {
error("%s: Directory traversal failed: %s: %s",
__func__, job_mount, strerror(errno));
@@ -707,3 +819,8 @@ extern int container_p_delete(uint32_t job_id)
return SLURM_SUCCESS;
}
+
+extern int container_p_delete(uint32_t job_id)
+{
+ return _delete_ns(job_id);
+}
diff --git a/src/plugins/prep/script/prep_script_slurmctld.c b/src/plugins/prep/script/prep_script_slurmctld.c
index 265e0143..78744478 100644
--- a/src/plugins/prep/script/prep_script_slurmctld.c
+++ b/src/plugins/prep/script/prep_script_slurmctld.c
@@ -172,6 +172,8 @@ static char **_build_env(job_record_t *job_ptr, bool is_epilog)
if (job_ptr->spank_job_env_size) {
env_array_merge(&my_env,
(const char **) job_ptr->spank_job_env);
+ valid_spank_job_env(my_env, job_ptr->spank_job_env_size,
+ job_ptr->user_id);
}
setenvf(&my_env, "SLURM_JOB_ACCOUNT", "%s", job_ptr->account);
diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c
index 4e505abf..bf52502f 100644
--- a/src/plugins/priority/multifactor/priority_multifactor.c
+++ b/src/plugins/priority/multifactor/priority_multifactor.c
@@ -872,6 +872,13 @@ static void _handle_tres_run_secs(uint64_t *tres_run_delta,
_handle_qos_tres_run_secs(NULL, tres_run_delta,
job_ptr->job_id, job_ptr->qos_ptr);
+
+ /* Only update partition qos if not being used by job */
+ if (job_ptr->part_ptr &&
+ (job_ptr->part_ptr->qos_ptr != job_ptr->qos_ptr))
+ _handle_qos_tres_run_secs(NULL, tres_run_delta, job_ptr->job_id,
+ job_ptr->part_ptr->qos_ptr);
+
while (assoc) {
_handle_assoc_tres_run_secs(NULL, tres_run_delta,
job_ptr->job_id, assoc);
diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c
index 39b4770d..010fb3b9 100644
--- a/src/plugins/sched/backfill/backfill.c
+++ b/src/plugins/sched/backfill/backfill.c
@@ -2502,9 +2502,10 @@ skip_start:
if (is_job_array_head &&
(job_ptr->array_task_id != NO_VAL)) {
/* Try starting next task of job array */
+ job_record_t *tmp = job_ptr;
job_ptr = find_job_record(job_ptr->
array_job_id);
- if (job_ptr &&
+ if (job_ptr && (job_ptr != tmp) &&
IS_JOB_PENDING(job_ptr) &&
(bb_g_job_test_stage_in(
job_ptr, false) == 1))
diff --git a/src/plugins/select/cons_common/dist_tasks.c b/src/plugins/select/cons_common/dist_tasks.c
index 583b4c92..eafbbb1e 100644
--- a/src/plugins/select/cons_common/dist_tasks.c
+++ b/src/plugins/select/cons_common/dist_tasks.c
@@ -211,7 +211,7 @@ static void _clear_spec_cores(job_record_t *job_ptr,
}
/* CPUs already selected for jobs, just distribute the tasks */
-static int _set_task_dist(job_record_t *job_ptr)
+static int _set_task_dist_internal(job_record_t *job_ptr)
{
uint32_t n, i, tid = 0, maxtasks;
uint16_t *avail_cpus;
@@ -334,6 +334,40 @@ static int _set_task_dist(job_record_t *job_ptr)
return SLURM_SUCCESS;
}
+static int _set_task_dist(job_record_t *job_ptr, const uint16_t cr_type)
+{
+ int error_code = _set_task_dist_internal(job_ptr);
+
+ if (error_code != SLURM_SUCCESS)
+ return error_code;
+
+ /*
+ * If we are asking for less threads per core than there are on the node
+ * we need to adjust for that for accounting.
+ * This will be reversed for getting the correct memory in cons_common.c
+ * _job_test() look for 'save_mem & MEM_PER_CPU'.
+ */
+ if (job_ptr->job_resrcs &&
+ (job_ptr->details->mc_ptr->threads_per_core != NO_VAL16) &&
+ ((cr_type & CR_CORE) || (cr_type & CR_SOCKET))) {
+ job_resources_t *job_res = job_ptr->job_resrcs;
+ int i = 0, n_last, n_first = bit_ffs(job_res->node_bitmap);
+
+ if (n_first == -1)
+ return SLURM_ERROR;
+
+ n_last = bit_fls(job_res->node_bitmap);
+ for (int n = n_first; n <= n_last; n++) {
+ if (!bit_test(job_res->node_bitmap, n) ||
+ (job_ptr->details->mc_ptr->threads_per_core ==
+ select_node_record[n].vpus))
+ continue;
+ job_res->cpus[i++] *= select_node_record[n].vpus;
+ }
+ }
+ return SLURM_SUCCESS;
+}
+
/* distribute blocks (planes) of tasks cyclically */
static int _compute_plane_dist(job_record_t *job_ptr,
uint32_t *gres_task_limit)
@@ -1177,7 +1211,7 @@ extern int dist_tasks(job_record_t *job_ptr, const uint16_t cr_type,
* The job has been allocated all non-specialized cores.
* Just set the task distribution for tres_per_task support.
*/
- error_code = _set_task_dist(job_ptr);
+ error_code = _set_task_dist(job_ptr, cr_type);
if (error_code != SLURM_SUCCESS)
return error_code;
return SLURM_SUCCESS;
@@ -1192,7 +1226,8 @@ extern int dist_tasks(job_record_t *job_ptr, const uint16_t cr_type,
* tres_per_task support.
*/
_clear_spec_cores(job_ptr, core_array);
- error_code = _set_task_dist(job_ptr);
+ error_code = _set_task_dist(job_ptr, cr_type);
+
if (error_code != SLURM_SUCCESS)
return error_code;
return SLURM_SUCCESS;
@@ -1233,7 +1268,7 @@ extern int dist_tasks(job_record_t *job_ptr, const uint16_t cr_type,
* by the next code block
*/
if (slurm_conf.select_type_param & CR_CORE_DEFAULT_DIST_BLOCK) {
- switch (job_ptr->details->task_dist & SLURM_DIST_NODEMASK) {
+ switch (job_ptr->details->task_dist & SLURM_DIST_NODESOCKMASK) {
case SLURM_DIST_ARBITRARY:
case SLURM_DIST_BLOCK:
case SLURM_DIST_CYCLIC:
diff --git a/src/plugins/select/cons_common/job_test.c b/src/plugins/select/cons_common/job_test.c
index 8af32a60..837a8c99 100644
--- a/src/plugins/select/cons_common/job_test.c
+++ b/src/plugins/select/cons_common/job_test.c
@@ -255,7 +255,8 @@ static void _set_gpu_defaults(job_record_t *job_ptr)
gres_plugin_job_set_defs(job_ptr->gres_list, "gpu", cpu_per_gpu,
mem_per_gpu, &job_ptr->cpus_per_tres,
- &job_ptr->mem_per_tres);
+ &job_ptr->mem_per_tres,
+ &job_ptr->details->cpus_per_task);
}
/* Determine how many sockets per node this job requires for GRES */
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index e9d6af90..ad185bde 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -150,7 +150,7 @@ static void _spec_core_filter(bitstr_t *node_bitmap, bitstr_t **core_bitmap)
xassert(core_bitmap);
if (*core_bitmap) {
- core_array_and_not(core_bitmap, p_spec_core_map);
+ core_array_and(core_bitmap, p_spec_core_map);
} else {
bit_not(*p_spec_core_map);
*core_bitmap = *p_spec_core_map;
diff --git a/src/plugins/select/cons_tres/job_test.c b/src/plugins/select/cons_tres/job_test.c
index 2bb09ede..a5aee966 100644
--- a/src/plugins/select/cons_tres/job_test.c
+++ b/src/plugins/select/cons_tres/job_test.c
@@ -43,7 +43,7 @@
typedef struct node_weight_struct {
bitstr_t *node_bitmap; /* bitmap of nodes with this weight */
- uint32_t weight; /* priority of node for scheduling work on */
+ uint64_t weight; /* priority of node for scheduling work on */
} node_weight_type;
typedef struct topo_weight_info {
@@ -110,8 +110,8 @@ static int _node_weight_sort(void *x, void *y);
static int _node_weight_find(void *x, void *key)
{
node_weight_type *nwt = (node_weight_type *) x;
- config_record_t *config_ptr = (config_record_t *) key;
- if (nwt->weight == config_ptr->weight)
+ node_record_t *node_ptr = (node_record_t *) key;
+ if (nwt->weight == node_ptr->sched_weight)
return 1;
return 0;
}
@@ -158,12 +158,11 @@ static List _build_node_weight_list(bitstr_t *node_bitmap)
if (!bit_test(node_bitmap, i))
continue;
node_ptr = node_record_table_ptr + i;
- nwt = list_find_first(node_list, _node_weight_find,
- node_ptr->config_ptr);
+ nwt = list_find_first(node_list, _node_weight_find, node_ptr);
if (!nwt) {
nwt = xmalloc(sizeof(node_weight_type));
nwt->node_bitmap = bit_alloc(select_node_cnt);
- nwt->weight = node_ptr->config_ptr->weight;
+ nwt->weight = node_ptr->sched_weight;
list_append(node_list, nwt);
}
bit_set(nwt->node_bitmap, i);
@@ -1873,18 +1872,6 @@ static int _eval_nodes_dfly(job_record_t *job_ptr,
}
}
- /* count up leaf switches */
- if (!req_nodes_bitmap) {
- for (i = 0, switch_ptr = switch_record_table;
- i < switch_record_cnt; i++, switch_ptr++) {
- if (switch_record_table[i].level != 0)
- continue;
- if (bit_overlap_any(switch_node_bitmap[i],
- best_nodes_bitmap))
- leaf_switch_count++;
- }
- }
-
if (req_nodes_bitmap &&
(!bit_super_set(req_nodes_bitmap, avail_nodes_bitmap))) {
info("%pJ requires nodes not available on any switch",
@@ -1993,25 +1980,6 @@ static int _eval_nodes_dfly(job_record_t *job_ptr,
}
}
- if (job_ptr->req_switch > 0) {
- if (time_waiting >= job_ptr->wait4switch) {
- job_ptr->best_switch = true;
- debug3("%pJ waited %ld sec for switches use=%d",
- job_ptr, time_waiting, leaf_switch_count);
- } else if (leaf_switch_count > job_ptr->req_switch) {
- /*
- * Allocation is for more than requested number of
- * switches.
- */
- job_ptr->best_switch = false;
- debug3("%pJ waited %ld sec for switches=%u found=%d wait %u",
- job_ptr, time_waiting, job_ptr->req_switch,
- leaf_switch_count, job_ptr->wait4switch);
- } else {
- job_ptr->best_switch = true;
- }
- }
-
/*
* Add additional resources as required from additional leaf switches
* on a round-robin basis
@@ -2076,7 +2044,39 @@ static int _eval_nodes_dfly(job_record_t *job_ptr,
}
rc = SLURM_ERROR;
-fini: FREE_NULL_LIST(best_gres);
+fini:
+ if ((job_ptr->req_switch > 0) && (rc == SLURM_SUCCESS) &&
+ switch_node_bitmap) {
+ /* req_switch == 1 here; enforced at the top of the function. */
+ leaf_switch_count = 0;
+
+ /* count up leaf switches */
+ for (i = 0, switch_ptr = switch_record_table;
+ i < switch_record_cnt; i++, switch_ptr++) {
+ if (switch_record_table[i].level != 0)
+ continue;
+ if (bit_overlap_any(switch_node_bitmap[i], node_map))
+ leaf_switch_count++;
+ }
+ if (time_waiting >= job_ptr->wait4switch) {
+ job_ptr->best_switch = true;
+ debug3("%pJ waited %ld sec for switches use=%d",
+ job_ptr, time_waiting, leaf_switch_count);
+ } else if (leaf_switch_count > job_ptr->req_switch) {
+ /*
+ * Allocation is for more than requested number of
+ * switches.
+ */
+ job_ptr->best_switch = false;
+ debug3("%pJ waited %ld sec for switches=%u found=%d wait %u",
+ job_ptr, time_waiting, job_ptr->req_switch,
+ leaf_switch_count, job_ptr->wait4switch);
+ } else {
+ job_ptr->best_switch = true;
+ }
+ }
+
+ FREE_NULL_LIST(best_gres);
FREE_NULL_LIST(node_weight_list);
FREE_NULL_BITMAP(avail_nodes_bitmap);
FREE_NULL_BITMAP(req_nodes_bitmap);
diff --git a/src/sacct/options.c b/src/sacct/options.c
index c8f8e6e6..cd84fd84 100644
--- a/src/sacct/options.c
+++ b/src/sacct/options.c
@@ -686,6 +686,7 @@ extern void parse_command_line(int argc, char **argv)
bool brief_output = false, long_output = false;
bool all_users = false;
bool all_clusters = false;
+ char *qos_names = NULL;
slurmdb_job_cond_t *job_cond = params.job_cond;
log_options_t opts = LOG_OPTS_STDERR_ONLY ;
int verbosity; /* count of -v options */
@@ -934,21 +935,7 @@ extern void parse_command_line(int argc, char **argv)
PRINT_FIELDS_PARSABLE_NO_ENDING;
break;
case 'q':
- if (!g_qos_list) {
- slurmdb_qos_cond_t qos_cond;
- memset(&qos_cond, 0,
- sizeof(slurmdb_qos_cond_t));
- qos_cond.with_deleted = 1;
- g_qos_list = slurmdb_qos_get(
- acct_db_conn, &qos_cond);
- }
-
- if (!job_cond->qos_list)
- job_cond->qos_list = list_create(xfree_ptr);
-
- if (!slurmdb_addto_qos_char_list(job_cond->qos_list,
- g_qos_list, optarg, 0))
- fatal("problem processing qos list");
+ qos_names = xstrdup(optarg);
break;
case 'r':
if (!job_cond->partition_list)
@@ -1138,6 +1125,26 @@ extern void parse_command_line(int argc, char **argv)
}
}
+ if (qos_names) {
+ if (!g_qos_list) {
+ slurmdb_qos_cond_t qos_cond;
+ memset(&qos_cond, 0,
+ sizeof(slurmdb_qos_cond_t));
+ qos_cond.with_deleted = 1;
+ g_qos_list = slurmdb_qos_get(
+ acct_db_conn, &qos_cond);
+ }
+
+ if (!job_cond->qos_list)
+ job_cond->qos_list = list_create(xfree_ptr);
+
+ if (!slurmdb_addto_qos_char_list(job_cond->qos_list,
+ g_qos_list, qos_names, 0))
+ fatal("problem processing qos list");
+ xfree(qos_names);
+ }
+
+
/* specific clusters requested? */
if (params.opt_federation && !all_clusters && !job_cond->cluster_list &&
!params.opt_local) {
diff --git a/src/salloc/opt.c b/src/salloc/opt.c
index 6f139444..a4044fe8 100644
--- a/src/salloc/opt.c
+++ b/src/salloc/opt.c
@@ -784,7 +784,7 @@ static void _usage(void)
{
printf(
"Usage: salloc [-N numnodes|[min nodes]-[max nodes]] [-n num-processors]\n"
-" [[-c cpus-per-node] [-r n] [-p partition] [--hold] [-t minutes]\n"
+" [-c cpus-per-node] [-r n] [-p partition] [--hold] [-t minutes]\n"
" [--immediate[=secs]] [--no-kill] [--overcommit] [-D path]\n"
" [--oversubscribe] [-J jobname]\n"
" [--verbose] [--gid=group] [--uid=user] [--licenses=names]\n"
@@ -797,13 +797,13 @@ static void _usage(void)
" [--network=type] [--mem-per-cpu=MB] [--qos=qos]\n"
" [--mem-bind=...] [--reservation=name] [--mcs-label=mcs]\n"
" [--time-min=minutes] [--gres=list] [--gres-flags=opts]\n"
-" [--cpu-freq=min[-max[:gov]] [--power=flags] [--profile=...]\n"
+" [--cpu-freq=min[-max[:gov]]] [--power=flags] [--profile=...]\n"
" [--switches=max-switches[@max-time-to-wait]]\n"
" [--core-spec=cores] [--thread-spec=threads] [--reboot]\n"
" [--bb=burst_buffer_spec] [--bbf=burst_buffer_file]\n"
" [--delay-boot=mins] [--use-min-nodes]\n"
" [--cpus-per-gpu=n] [--gpus=n] [--gpu-bind=...] [--gpu-freq=...]\n"
-" [--gpus-per-node=n] [--gpus-per-socket=n] [--gpus-per-task=n]\n"
+" [--gpus-per-node=n] [--gpus-per-socket=n] [--gpus-per-task=n]\n"
" [--mem-per-gpu=MB]\n"
" [command [args...]]\n");
}
diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c
index ced1dc67..b4d7f70e 100644
--- a/src/sbatch/opt.c
+++ b/src/sbatch/opt.c
@@ -1108,16 +1108,16 @@ static void _usage(void)
" [-D path] [--no-kill] [--overcommit]\n"
" [--input file] [--output file] [--error file]\n"
" [--time-min=minutes] [--licenses=names] [--clusters=cluster_names]\n"
-" [--chdir=directory] [--oversubscibe] [-m dist] [-J jobname]\n"
+" [--chdir=directory] [--oversubscribe] [-m dist] [-J jobname]\n"
" [--verbose] [--gid=group] [--uid=user]\n"
" [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n"
" [--account=name] [--dependency=type:jobid[+time]] [--comment=name]\n"
-" [--mail-type=type] [--mail-user=user][--nice[=value]] [--wait]\n"
+" [--mail-type=type] [--mail-user=user] [--nice[=value]] [--wait]\n"
" [--requeue] [--no-requeue] [--ntasks-per-node=n] [--propagate]\n"
" [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n"
" [--network=type] [--mem-per-cpu=MB] [--qos=qos] [--gres=list]\n"
" [--mem-bind=...] [--reservation=name] [--mcs-label=mcs]\n"
-" [--cpu-freq=min[-max[:gov]] [--power=flags] [--gres-flags=opts]\n"
+" [--cpu-freq=min[-max[:gov]]] [--power=flags] [--gres-flags=opts]\n"
" [--switches=max-switches{@max-time-to-wait}] [--reboot]\n"
" [--core-spec=cores] [--thread-spec=threads]\n"
" [--bb=burst_buffer_spec] [--bbf=burst_buffer_file]\n"
@@ -1125,7 +1125,7 @@ static void _usage(void)
" [--export[=names]] [--export-file=file|fd] [--delay-boot=mins]\n"
" [--use-min-nodes]\n"
" [--cpus-per-gpu=n] [--gpus=n] [--gpu-bind=...] [--gpu-freq=...]\n"
-" [--gpus-per-node=n] [--gpus-per-socket=n] [--gpus-per-task=n]\n"
+" [--gpus-per-node=n] [--gpus-per-socket=n] [--gpus-per-task=n]\n"
" [--mem-per-gpu=MB]\n"
" executable [args...]\n");
}
diff --git a/src/sdiag/opts.c b/src/sdiag/opts.c
index 3af34bc3..33356fca 100644
--- a/src/sdiag/opts.c
+++ b/src/sdiag/opts.c
@@ -147,7 +147,7 @@ extern void parse_command_line(int argc, char **argv)
static void _usage( void )
{
- printf("Usage: sdiag [-M cluster] [-aritT] \n");
+ printf("Usage: sdiag [-M cluster] [-aritT]\n");
}
static void _help( void )
diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c
index da6a1145..69fff642 100644
--- a/src/slurmctld/acct_policy.c
+++ b/src/slurmctld/acct_policy.c
@@ -1050,10 +1050,21 @@ static void _qos_alter_job(job_record_t *job_ptr,
for (i=0; i<slurmctld_tres_cnt; i++) {
if (used_tres_run_secs[i] == new_used_tres_run_secs[i])
continue;
- qos_ptr->usage->grp_used_tres_run_secs[i] -=
- used_tres_run_secs[i];
- qos_ptr->usage->grp_used_tres_run_secs[i] +=
+ /*
+ * Handle the case when remaining usage is less than
+ * the original job request.
+ */
+ int64_t used_tres_run_sec_decr =
+ used_tres_run_secs[i] -
new_used_tres_run_secs[i];
+ if ((used_tres_run_sec_decr < 0) ||
+ (used_tres_run_sec_decr <
+ qos_ptr->usage->grp_used_tres_run_secs[i]))
+ qos_ptr->usage->grp_used_tres_run_secs[i] -=
+ used_tres_run_sec_decr;
+ else
+ qos_ptr->usage->grp_used_tres_run_secs[i] = 0;
+
debug2("altering %pJ QOS %s got %"PRIu64" just removed %"PRIu64" and added %"PRIu64,
job_ptr, qos_ptr->name,
qos_ptr->usage->grp_used_tres_run_secs[i],
@@ -1410,7 +1421,7 @@ static acct_policy_tres_usage_t _validate_tres_usage_limits(
if (curr_usage)
usage = curr_usage[i];
- if (tres_usage &&
+ if (tres_usage && tres_req_cnt[i] &&
((tres_req_cnt[i] + tres_usage[i]) >
(tres_limit_array[i] - usage)))
return TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE;
@@ -2681,10 +2692,21 @@ extern void acct_policy_alter_job(job_record_t *job_ptr,
for (i=0; i<slurmctld_tres_cnt; i++) {
if (used_tres_run_secs[i] == new_used_tres_run_secs[i])
continue;
- assoc_ptr->usage->grp_used_tres_run_secs[i] -=
- used_tres_run_secs[i];
- assoc_ptr->usage->grp_used_tres_run_secs[i] +=
+ /*
+ * Handle the case when remaining usage is less than
+ * the original job request.
+ */
+ int64_t used_tres_run_sec_decr =
+ used_tres_run_secs[i] -
new_used_tres_run_secs[i];
+ if ((used_tres_run_sec_decr < 0) ||
+ (used_tres_run_sec_decr <
+ assoc_ptr->usage->grp_used_tres_run_secs[i]))
+ assoc_ptr->usage->grp_used_tres_run_secs[i] -=
+ used_tres_run_sec_decr;
+ else
+ assoc_ptr->usage->grp_used_tres_run_secs[i] = 0;
+
debug2("altering %pJ assoc %u(%s/%s/%s) got %"PRIu64" just removed %"PRIu64" and added %"PRIu64,
job_ptr, assoc_ptr->id, assoc_ptr->acct,
assoc_ptr->user, assoc_ptr->partition,
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 82d10686..a39bd134 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -139,10 +139,12 @@ typedef struct {
typedef struct {
Buf buffer;
uint32_t filter_uid;
+ bool has_qos_lock;
uint32_t *jobs_packed;
uint16_t protocol_version;
uint16_t show_flags;
uid_t uid;
+ slurmdb_user_rec_t user_rec;
} _foreach_pack_job_info_t;
typedef struct {
@@ -4632,6 +4634,7 @@ extern job_record_t *job_array_split(job_record_t *job_ptr)
job_ptr_pend->node_bitmap_cg = NULL;
job_ptr_pend->nodes = NULL;
job_ptr_pend->nodes_completing = NULL;
+ job_ptr_pend->origin_cluster = xstrdup(job_ptr->origin_cluster);
job_ptr_pend->partition = xstrdup(job_ptr->partition);
job_ptr_pend->part_ptr_list = part_list_copy(job_ptr->part_ptr_list);
/* On jobs that are held the priority_array isn't set up yet,
@@ -8565,6 +8568,8 @@ static bool _valid_pn_min_mem(job_desc_msg_t *job_desc_msg,
(job_desc_msg->max_cpus < job_desc_msg->min_cpus)) {
job_desc_msg->max_cpus = job_desc_msg->min_cpus;
}
+ } else {
+ job_desc_msg->pn_min_cpus = job_desc_msg->cpus_per_task;
}
return true;
}
@@ -9563,6 +9568,7 @@ static void _list_delete_job(void *job_entry)
xfree(job_ptr->batch_features);
xfree(job_ptr->batch_host);
xfree(job_ptr->burst_buffer);
+ xfree(job_ptr->burst_buffer_state);
xfree(job_ptr->comment);
xfree(job_ptr->clusters);
xfree(job_ptr->cpus_per_tres);
@@ -9751,7 +9757,7 @@ end_it:
}
/* Determine if ALL partitions associated with a job are hidden */
-static bool _all_parts_hidden(job_record_t *job_ptr, uid_t uid)
+static bool _all_parts_hidden(job_record_t *job_ptr, slurmdb_user_rec_t *user)
{
bool rc;
ListIterator part_iterator;
@@ -9761,7 +9767,7 @@ static bool _all_parts_hidden(job_record_t *job_ptr, uid_t uid)
rc = true;
part_iterator = list_iterator_create(job_ptr->part_ptr_list);
while ((part_ptr = list_next(part_iterator))) {
- if (part_is_visible(part_ptr, uid)) {
+ if (part_is_visible_user_rec(part_ptr, user)) {
rc = false;
break;
}
@@ -9770,7 +9776,8 @@ static bool _all_parts_hidden(job_record_t *job_ptr, uid_t uid)
return rc;
}
- if (job_ptr->part_ptr && part_is_visible(job_ptr->part_ptr, uid))
+ if (job_ptr->part_ptr &&
+ part_is_visible_user_rec(job_ptr->part_ptr, user))
return false;
return true;
}
@@ -9792,6 +9799,25 @@ static bool _hide_job(job_record_t *job_ptr, uid_t uid, uint16_t show_flags)
return false;
}
+/* Determine if a given job should be seen by a specific user */
+static bool _hide_job_user_rec(job_record_t *job_ptr, slurmdb_user_rec_t *user,
+ uint16_t show_flags)
+{
+ if (!(show_flags & SHOW_ALL) && IS_JOB_REVOKED(job_ptr))
+ return true;
+
+ if ((slurm_conf.private_data & PRIVATE_DATA_JOBS) &&
+ (job_ptr->user_id != user->uid) &&
+ !validate_operator_user_rec(user) &&
+ (((slurm_mcs_get_privatedata() == 0) &&
+ !assoc_mgr_is_user_acct_coord_user_rec(acct_db_conn, user,
+ job_ptr->account)) ||
+ ((slurm_mcs_get_privatedata() == 1) &&
+ (mcs_g_check_mcs_label(user->uid, job_ptr->mcs_label) != 0))))
+ return true;
+ return false;
+}
+
static int _pack_job(void *object, void *arg)
{
job_record_t *job_ptr = (job_record_t *)object;
@@ -9805,14 +9831,16 @@ static int _pack_job(void *object, void *arg)
if (((pack_info->show_flags & SHOW_ALL) == 0) &&
(pack_info->uid != 0) &&
- _all_parts_hidden(job_ptr, pack_info->uid))
+ _all_parts_hidden(job_ptr, &pack_info->user_rec))
return SLURM_SUCCESS;
- if (_hide_job(job_ptr, pack_info->uid, pack_info->show_flags))
+ if (_hide_job_user_rec(job_ptr, &pack_info->user_rec,
+ pack_info->show_flags))
return SLURM_SUCCESS;
pack_job(job_ptr, pack_info->show_flags, pack_info->buffer,
- pack_info->protocol_version, pack_info->uid);
+ pack_info->protocol_version, pack_info->uid,
+ pack_info->has_qos_lock);
(*pack_info->jobs_packed)++;
@@ -9851,6 +9879,7 @@ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size,
uint32_t jobs_packed = 0, tmp_offset;
_foreach_pack_job_info_t pack_info = {0};
Buf buffer;
+ assoc_mgr_lock_t locks = { .user = READ_LOCK, .qos = READ_LOCK };
buffer_ptr[0] = NULL;
*buffer_size = 0;
@@ -9869,8 +9898,14 @@ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size,
pack_info.protocol_version = protocol_version;
pack_info.show_flags = show_flags;
pack_info.uid = uid;
+ pack_info.has_qos_lock = true;
+ pack_info.user_rec.uid = uid;
+ assoc_mgr_lock(&locks);
+ assoc_mgr_fill_in_user(acct_db_conn, &pack_info.user_rec,
+ accounting_enforce, NULL, true);
list_for_each(job_list, _pack_job, &pack_info);
+ assoc_mgr_unlock(&locks);
/* put the real record count in the message body header */
tmp_offset = get_buf_offset(buffer);
@@ -9903,6 +9938,7 @@ extern void pack_spec_jobs(char **buffer_ptr, int *buffer_size, List job_ids,
uint32_t jobs_packed = 0, tmp_offset;
_foreach_pack_job_info_t pack_info = {0};
Buf buffer;
+ assoc_mgr_lock_t locks = { .user = READ_LOCK, .qos = READ_LOCK };
xassert(job_ids);
@@ -9923,8 +9959,14 @@ extern void pack_spec_jobs(char **buffer_ptr, int *buffer_size, List job_ids,
pack_info.protocol_version = protocol_version;
pack_info.show_flags = show_flags;
pack_info.uid = uid;
+ pack_info.has_qos_lock = true;
+ pack_info.user_rec.uid = uid;
+ assoc_mgr_lock(&locks);
+ assoc_mgr_fill_in_user(acct_db_conn, &pack_info.user_rec,
+ accounting_enforce, NULL, true);
list_for_each(job_ids, _foreach_pack_jobid, &pack_info);
+ assoc_mgr_unlock(&locks);
/* put the real record count in the message body header */
tmp_offset = get_buf_offset(buffer);
@@ -9947,7 +9989,7 @@ static int _pack_het_job(job_record_t *job_ptr, uint16_t show_flags,
while ((het_job_ptr = list_next(iter))) {
if (het_job_ptr->het_job_id == job_ptr->het_job_id) {
pack_job(het_job_ptr, show_flags, buffer,
- protocol_version, uid);
+ protocol_version, uid, false);
job_cnt++;
} else {
error("%s: Bad het_job_list for %pJ",
@@ -10002,7 +10044,7 @@ extern int pack_one_job(char **buffer_ptr, int *buffer_size,
/* Pack regular (not array) job */
if (!_hide_job(job_ptr, uid, show_flags)) {
pack_job(job_ptr, show_flags, buffer, protocol_version,
- uid);
+ uid, false);
jobs_packed++;
}
} else {
@@ -10013,7 +10055,7 @@ extern int pack_one_job(char **buffer_ptr, int *buffer_size,
packed_head = true;
if (!_hide_job(job_ptr, uid, show_flags)) {
pack_job(job_ptr, show_flags, buffer,
- protocol_version, uid);
+ protocol_version, uid, false);
jobs_packed++;
}
}
@@ -10026,7 +10068,7 @@ extern int pack_one_job(char **buffer_ptr, int *buffer_size,
if (_hide_job(job_ptr, uid, show_flags))
break;
pack_job(job_ptr, show_flags, buffer,
- protocol_version, uid);
+ protocol_version, uid, false);
jobs_packed++;
}
job_ptr = job_ptr->job_array_next_j;
@@ -10075,13 +10117,14 @@ static void _pack_job_gres(job_record_t *dump_job_ptr, Buf buffer,
* whenever the data format changes
*/
void pack_job(job_record_t *dump_job_ptr, uint16_t show_flags, Buf buffer,
- uint16_t protocol_version, uid_t uid)
+ uint16_t protocol_version, uid_t uid, bool has_qos_lock)
{
struct job_details *detail_ptr;
time_t accrue_time = 0, begin_time = 0, start_time = 0, end_time = 0;
uint32_t time_limit;
char *nodelist = NULL;
assoc_mgr_lock_t locks = { .qos = READ_LOCK };
+ xassert(!has_qos_lock || verify_assoc_lock(QOS_LOCK, READ_LOCK));
if (protocol_version >= SLURM_20_02_PROTOCOL_VERSION) {
detail_ptr = dump_job_ptr->details;
@@ -10210,7 +10253,8 @@ void pack_job(job_record_t *dump_job_ptr, uint16_t show_flags, Buf buffer,
packstr(dump_job_ptr->burst_buffer_state, buffer);
packstr(dump_job_ptr->system_comment, buffer);
- assoc_mgr_lock(&locks);
+ if (!has_qos_lock)
+ assoc_mgr_lock(&locks);
if (dump_job_ptr->qos_ptr)
packstr(dump_job_ptr->qos_ptr->name, buffer);
else {
@@ -10231,7 +10275,8 @@ void pack_job(job_record_t *dump_job_ptr, uint16_t show_flags, Buf buffer,
} else {
pack_time(0, buffer);
}
- assoc_mgr_unlock(&locks);
+ if (!has_qos_lock)
+ assoc_mgr_unlock(&locks);
packstr(dump_job_ptr->licenses, buffer);
packstr(dump_job_ptr->state_desc, buffer);
@@ -10439,7 +10484,8 @@ void pack_job(job_record_t *dump_job_ptr, uint16_t show_flags, Buf buffer,
packstr(dump_job_ptr->burst_buffer_state, buffer);
packstr(dump_job_ptr->system_comment, buffer);
- assoc_mgr_lock(&locks);
+ if (!has_qos_lock)
+ assoc_mgr_lock(&locks);
if (dump_job_ptr->qos_ptr)
packstr(dump_job_ptr->qos_ptr->name, buffer);
else {
@@ -10460,7 +10506,8 @@ void pack_job(job_record_t *dump_job_ptr, uint16_t show_flags, Buf buffer,
} else {
pack_time(0, buffer);
}
- assoc_mgr_unlock(&locks);
+ if (!has_qos_lock)
+ assoc_mgr_unlock(&locks);
packstr(dump_job_ptr->licenses, buffer);
packstr(dump_job_ptr->state_desc, buffer);
@@ -10723,8 +10770,13 @@ static void _pack_default_job_details(job_record_t *job_ptr, Buf buffer,
pack32(detail_ptr->num_tasks, buffer);
else if (IS_JOB_PENDING(job_ptr))
pack32(detail_ptr->min_nodes, buffer);
+ else if (job_ptr->tres_alloc_cnt)
+ pack32((uint32_t)
+ job_ptr->tres_alloc_cnt[TRES_ARRAY_NODE],
+ buffer);
else
pack32(job_ptr->node_cnt, buffer);
+
pack16(shared, buffer);
pack32(detail_ptr->cpu_freq_min, buffer);
pack32(detail_ptr->cpu_freq_max, buffer);
diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c
index 65977071..dabf2632 100644
--- a/src/slurmctld/job_scheduler.c
+++ b/src/slurmctld/job_scheduler.c
@@ -1842,8 +1842,10 @@ skip_start:
if (is_job_array_head &&
(job_ptr->array_task_id != NO_VAL)) {
/* Try starting another task of the job array */
+ job_record_t *tmp = job_ptr;
job_ptr = find_job_record(job_ptr->array_job_id);
- if (job_ptr && IS_JOB_PENDING(job_ptr) &&
+ if (job_ptr && (job_ptr != tmp) &&
+ IS_JOB_PENDING(job_ptr) &&
(bb_g_job_test_stage_in(job_ptr,false) ==1))
goto next_task;
}
diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c
index edc13a70..63b20a84 100644
--- a/src/slurmctld/partition_mgr.c
+++ b/src/slurmctld/partition_mgr.c
@@ -1060,6 +1060,22 @@ extern bool part_is_visible(part_record_t *part_ptr, uid_t uid)
return true;
}
+/* partition is visible to the user */
+extern bool part_is_visible_user_rec(part_record_t *part_ptr,
+ slurmdb_user_rec_t *user)
+{
+ xassert(verify_lock(PART_LOCK, READ_LOCK));
+
+ if (validate_operator_user_rec(user))
+ return true;
+ if (part_ptr->flags & PART_FLAG_HIDDEN)
+ return false;
+ if (!validate_group(part_ptr, user->uid))
+ return false;
+
+ return true;
+}
+
/*
* pack_all_part - dump all partition information for all partitions in
* machine independent form (for network transmission)
diff --git a/src/slurmctld/prep_slurmctld.c b/src/slurmctld/prep_slurmctld.c
index fe37ef67..348b9290 100644
--- a/src/slurmctld/prep_slurmctld.c
+++ b/src/slurmctld/prep_slurmctld.c
@@ -68,7 +68,7 @@ extern void prep_prolog_slurmctld_callback(int rc, uint32_t job_id)
if (job_ptr->prep_prolog_cnt) {
debug2("%s: still %u async prologs left to complete",
__func__, job_ptr->prep_prolog_cnt);
- lock_slurmctld(job_write_lock);
+ unlock_slurmctld(job_write_lock);
return;
}
@@ -128,7 +128,7 @@ extern void prep_epilog_slurmctld_callback(int rc, uint32_t job_id)
if (job_ptr->prep_epilog_cnt) {
debug2("%s: still %u async epilogs left to complete",
__func__, job_ptr->prep_epilog_cnt);
- lock_slurmctld(job_write_lock);
+ unlock_slurmctld(job_write_lock);
return;
}
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index eb8cb5dc..ed501fd5 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -590,6 +590,21 @@ extern bool validate_operator(uid_t uid)
return false;
}
+extern bool validate_operator_user_rec(slurmdb_user_rec_t *user)
+{
+#ifndef NDEBUG
+ if (drop_priv)
+ return false;
+#endif
+ if ((user->uid == 0) ||
+ (user->uid == slurm_conf.slurm_user_id) ||
+ (user->admin_level >= SLURMDB_ADMIN_OPERATOR))
+ return true;
+ else
+ return false;
+
+}
+
static void _set_hostname(slurm_msg_t *msg, char **alloc_node)
{
slurm_addr_t addr;
diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c
index 5ed07c3f..67e309ed 100644
--- a/src/slurmctld/read_config.c
+++ b/src/slurmctld/read_config.c
@@ -1331,6 +1331,16 @@ int read_slurm_conf(int recover, bool reconfig)
_gres_reconfig(reconfig);
reset_job_bitmaps(); /* must follow select_g_job_init() */
+ /*
+ * The burst buffer plugin must be initialized and state loaded before
+ * _sync_nodes_to_jobs(), which calls bb_g_job_init().
+ */
+ if (reconfig)
+ rc = bb_g_reconfig();
+ else
+ rc = bb_g_load_state(true);
+ error_code = MAX(error_code, rc); /* not fatal */
+
(void) _sync_nodes_to_jobs(reconfig);
(void) sync_job_files();
_purge_old_node_state(old_node_table_ptr, old_node_record_count);
@@ -1480,11 +1490,6 @@ int read_slurm_conf(int recover, bool reconfig)
}
rc = _preserve_select_type_param(&slurm_conf, old_select_type_p);
error_code = MAX(error_code, rc); /* not fatal */
- if (reconfig)
- rc = bb_g_reconfig();
- else
- rc = bb_g_load_state(true);
- error_code = MAX(error_code, rc); /* not fatal */
/*
* Restore job accounting info if file missing or corrupted,
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 78dfaac4..ff0d61ac 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -2022,11 +2022,13 @@ extern void pack_all_part(char **buffer_ptr, int *buffer_size,
* IN/OUT buffer - buffer in which data is placed, pointers automatically
* updated
* IN uid - user requesting the data
+ * IN has_qos_lock - true if assoc_lock .qos=READ_LOCK already acquired
* NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c
* whenever the data format changes
*/
extern void pack_job(job_record_t *dump_job_ptr, uint16_t show_flags,
- Buf buffer, uint16_t protocol_version, uid_t uid);
+ Buf buffer, uint16_t protocol_version, uid_t uid,
+ bool has_qos_lock);
/*
* pack_part - dump all configuration information about a specific partition
@@ -2079,6 +2081,10 @@ extern void pack_one_node (char **buffer_ptr, int *buffer_size,
/* part_is_visible - should user be able to see this partition */
extern bool part_is_visible(part_record_t *part_ptr, uid_t uid);
+/* part_is_visible_user_rec - should user be able to see this partition */
+extern bool part_is_visible_user_rec(part_record_t *part_ptr,
+ slurmdb_user_rec_t *user);
+
/* part_fini - free all memory associated with partition records */
extern void part_fini (void);
@@ -2607,6 +2613,14 @@ extern bool validate_super_user(uid_t uid);
*/
extern bool validate_operator(uid_t uid);
+/*
+ * validate_operator_user_rec - validate that the user is authorized at the
+ * root, SlurmUser, or SLURMDB_ADMIN_OPERATOR level
+ * IN user - slurmdb_user_rec_t of user to check
+ * RET true if permitted to run, false otherwise
+ */
+extern bool validate_operator_user_rec(slurmdb_user_rec_t *user);
+
/* cleanup_completing()
*
* Clean up the JOB_COMPLETING flag and eventually
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index e021c1b7..3855962f 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -1126,12 +1126,15 @@ static bitstr_t *_pick_step_nodes(job_record_t *job_ptr,
cpus_used[node_inx];
job_blocked_cpus += job_resrcs_ptr->
cpus_used[node_inx];
+ if (!total_cpus)
+ job_blocked_nodes++;
}
}
if (!total_cpus) {
log_flag(STEPS, "%s: %pJ Skipping node. Not enough CPUs to run step here.",
__func__, job_ptr);
+ bit_clear(nodes_avail, i);
continue;
}
@@ -1432,6 +1435,12 @@ static bitstr_t *_pick_step_nodes(job_record_t *job_ptr,
usable_cpu_cnt[i] =
job_resrcs_ptr->cpus[node_inx];
+ log_flag(STEPS, "%s: %pJ Currently running steps use %d of allocated %d CPUs on node %s",
+ __func__, job_ptr,
+ job_resrcs_ptr->cpus_used[node_inx],
+ usable_cpu_cnt[i],
+ node_record_table_ptr[i].name);
+
if (step_spec->flags & SSF_EXCLUSIVE) {
/*
* If whole is given and
@@ -1453,8 +1462,15 @@ static bitstr_t *_pick_step_nodes(job_record_t *job_ptr,
usable_cpu_cnt[i] -=
job_resrcs_ptr->
cpus_used[node_inx];
+ if (!usable_cpu_cnt[i])
+ job_blocked_nodes++;
}
}
+ if (!usable_cpu_cnt[i]) {
+ log_flag(STEPS, "%s: %pJ Skipping node. Not enough CPUs to run step here.",
+ __func__, job_ptr);
+ bit_clear(nodes_avail, i);
+ }
}
}
@@ -1723,8 +1739,9 @@ static void _pick_step_cores(step_record_t *step_ptr,
use_all_cores = false;
if (step_ptr->cpus_per_task > 0) {
- cpu_cnt *= step_ptr->cpus_per_task + cpus_per_core - 1;
- cpu_cnt /= cpus_per_core;
+ cpu_cnt *= step_ptr->cpus_per_task;
+ cpu_cnt += (cpus_per_core - 1);
+ cpu_cnt /= cpus_per_core;
}
}
@@ -1992,6 +2009,12 @@ static void _step_dealloc_lps(step_record_t *step_ptr)
int job_node_inx = -1, step_node_inx = -1;
xassert(job_resrcs_ptr);
+ if (!job_resrcs_ptr) {
+ error("%s: job_resrcs is NULL for %pS; this should never happen",
+ __func__, step_ptr);
+ return;
+ }
+
xassert(job_resrcs_ptr->cpus);
xassert(job_resrcs_ptr->cpus_used);
@@ -3425,6 +3448,9 @@ extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, bool finish,
job_record_t *job_ptr;
step_record_t *step_ptr;
int nodes, rem_nodes;
+#ifndef HAVE_FRONT_END
+ int range_bits, set_bits;
+#endif
xassert(rem);
@@ -3469,9 +3495,6 @@ extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, bool finish,
return EINVAL;
}
- ext_sensors_g_get_stependdata(step_ptr);
- jobacctinfo_aggregate(step_ptr->jobacct, req->jobacct);
-
/* we have been adding task average frequencies for
* jobacct->act_cpufreq so we need to divide with the
* total number of tasks/cpus for the step average frequency */
@@ -3502,10 +3525,45 @@ extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, bool finish,
bit_set_all(step_ptr->exit_node_bitmap);
rem_nodes = 0;
#else
+ range_bits = req->range_last + 1 - req->range_first;
+ set_bits = bit_set_count_range(step_ptr->exit_node_bitmap,
+ req->range_first,
+ req->range_last + 1);
+
+ /* Check if any stepd of the range was already received */
+ if (set_bits) {
+ /* If all are already received skip jobacctinfo_aggregate */
+ if (set_bits == range_bits) {
+ debug("Step complete from %d to %d was already processed. Probably a RPC was resent from a child.",
+ req->range_first, req->range_last);
+ goto no_aggregate;
+ }
+
+ /*
+ * If partially received, we cannot recover the right gathered
+ * information. If we don't gather the new one we'll miss some
+ * information, and if we gather it some of the info will be
+ * duplicated. We log that error and chose to partially
+ * duplicate because it's probably a smaller error.
+ */
+ error("Step complete from %d to %d was already processed (%d of %d). Probably a RPC was resent from a child and gathered information is partially duplicated.",
+ req->range_first, req->range_last,
+ set_bits, range_bits);
+ }
+
bit_nset(step_ptr->exit_node_bitmap,
req->range_first, req->range_last);
+
+#endif
+
+ ext_sensors_g_get_stependdata(step_ptr);
+ jobacctinfo_aggregate(step_ptr->jobacct, req->jobacct);
+
+#ifndef HAVE_FRONT_END
+no_aggregate:
rem_nodes = bit_clear_count(step_ptr->exit_node_bitmap);
#endif
+
*rem = rem_nodes;
if (rem_nodes == 0) {
/* release all switch windows */
diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index 82563adc..94e23ed1 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -948,6 +948,7 @@ static int _check_job_credential(launch_tasks_request_msg_t *req,
uint32_t stepid = req->step_id.step_id;
int tasks_to_launch = req->tasks_to_launch[node_id];
uint32_t job_cpus = 0, step_cpus = 0;
+ uint32_t job_cpus_for_mem = 0, step_cpus_for_mem = 0;
if (req->flags & LAUNCH_NO_ALLOC) {
if (user_ok) {
@@ -1142,7 +1143,14 @@ static int _check_job_credential(launch_tasks_request_msg_t *req,
if (i_last_bit <= i_first_bit)
error("step credential has no CPUs selected");
else {
+ uint16_t scale_for_mem;
i = conf->cpus / (i_last_bit - i_first_bit);
+ if (req->threads_per_core &&
+ (req->threads_per_core != NO_VAL16) &&
+ (req->threads_per_core < conf->threads))
+ scale_for_mem = req->threads_per_core;
+ else
+ scale_for_mem = i;
if (i > 1) {
if (cpu_log)
info("Scaling CPU count by factor of "
@@ -1150,7 +1158,9 @@ static int _check_job_credential(launch_tasks_request_msg_t *req,
i, conf->cpus,
i_last_bit, i_first_bit);
step_cpus *= i;
+ step_cpus_for_mem *= scale_for_mem;
job_cpus *= i;
+ job_cpus_for_mem *= scale_for_mem;
}
}
if (tasks_to_launch > step_cpus) {
@@ -1173,20 +1183,20 @@ static int _check_job_credential(launch_tasks_request_msg_t *req,
if (arg.step_mem_limit & MEM_PER_CPU) {
req->step_mem_lim = arg.step_mem_limit &
(~MEM_PER_CPU);
- req->step_mem_lim *= step_cpus;
+ req->step_mem_lim *= step_cpus_for_mem;
} else
req->step_mem_lim = arg.step_mem_limit;
} else {
if (arg.job_mem_limit & MEM_PER_CPU) {
req->step_mem_lim = arg.job_mem_limit &
(~MEM_PER_CPU);
- req->step_mem_lim *= job_cpus;
+ req->step_mem_lim *= job_cpus_for_mem;
} else
req->step_mem_lim = arg.job_mem_limit;
}
if (arg.job_mem_limit & MEM_PER_CPU) {
req->job_mem_lim = arg.job_mem_limit & (~MEM_PER_CPU);
- req->job_mem_lim *= job_cpus;
+ req->job_mem_lim *= job_cpus_for_mem;
} else
req->job_mem_lim = arg.job_mem_limit;
req->job_core_spec = arg.job_core_spec;
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index e4235fad..a6f41c1f 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -785,7 +785,14 @@ _one_step_complete_msg(stepd_step_rec_t *job, int first, int last)
if ((retcode == 0) && (rc == 0))
goto finished;
}
- /* on error AGAIN, send to the slurmctld instead */
+ /*
+ * On error AGAIN, send to the slurmctld instead.
+ * This is useful if parent_rank gave up waiting for us
+ * on stepd_wait_for_children_slurmstepd.
+ * If it's just busy handeling our prev messages we'll need
+ * to handle duplicated messages in both the parent and
+ * slurmctld.
+ */
debug3("Rank %d sending complete to slurmctld instead, range "
"%d to %d", step_complete.rank, first, last);
} else {
diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c
index 2267171c..f4d83c69 100644
--- a/src/slurmd/slurmstepd/req.c
+++ b/src/slurmd/slurmstepd/req.c
@@ -1669,17 +1669,34 @@ _handle_completion(int fd, stepd_step_rec_t *job, uid_t uid)
* without the hostlist from the credential.
*/
if (step_complete.bits && (step_complete.rank >= 0)) {
+ int32_t set_bits;
+ int32_t first_bit = first - (step_complete.rank + 1);
+ int32_t last_bit = last - (step_complete.rank + 1);
+ /* bit_set_count_range is [first, end) so +1 last_bit */
+ int32_t last_bit_range = last_bit + 1;
+
#if 0
char bits_string[128];
debug2("Setting range %d (bit %d) through %d(bit %d)",
- first, first-(step_complete.rank+1),
- last, last-(step_complete.rank+1));
+ first, first_bit,
+ last, last_bit);
bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
debug2(" before bits: %s", bits_string);
#endif
- bit_nset(step_complete.bits,
- first - (step_complete.rank+1),
- last - (step_complete.rank+1));
+ if (!(set_bits = bit_set_count_range(step_complete.bits,
+ first_bit,
+ last_bit_range))) {
+ bit_nset(step_complete.bits, first_bit, last_bit);
+ } else if (set_bits == (last_bit_range - first_bit)) {
+ debug("Step complete from %d to %d was already processed on rank %d. Probably a RPC was resent from a child.",
+ first, last, step_complete.rank);
+ goto timeout;
+ } else {
+ error("Step complete from %d to %d was half-way processed on rank %d. This should never happen.",
+ first, last, step_complete.rank);
+ goto timeout;
+ }
+
#if 0
bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
debug2(" after bits: %s", bits_string);
diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c
index 1bbaa651..8a453c56 100644
--- a/src/slurmd/slurmstepd/slurmstepd_job.c
+++ b/src/slurmd/slurmstepd/slurmstepd_job.c
@@ -47,6 +47,7 @@
#include <sys/types.h>
#include "src/common/eio.h"
+#include "src/common/env.h"
#include "src/common/fd.h"
#include "src/common/gres.h"
#include "src/common/group_cache.h"
@@ -279,6 +280,7 @@ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg,
slurm_addr_t resp_addr;
slurm_addr_t io_addr;
int i, nodeid = NO_VAL;
+ uint16_t cpus = conf->cpus;
xassert(msg != NULL);
xassert(msg->complete_nodelist != NULL);
@@ -472,7 +474,16 @@ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg,
job->switch_job = msg->switch_job;
job->open_mode = msg->open_mode;
job->options = msg->options;
- format_core_allocs(msg->cred, conf->node_name, conf->cpus,
+
+ /*
+ * FIXME: This is band-aid for --threads-per-core < ThreadsPerCore
+ * used with --mem-per-cpu.
+ */
+ if (msg->threads_per_core && (msg->threads_per_core != NO_VAL16) &&
+ (msg->threads_per_core < conf->threads))
+ cpus = msg->threads_per_core * conf->cores;
+
+ format_core_allocs(msg->cred, conf->node_name, cpus,
&job->job_alloc_cores, &job->step_alloc_cores,
&job->job_mem, &job->step_mem);
@@ -509,6 +520,8 @@ batch_stepd_step_rec_create(batch_job_launch_msg_t *msg)
stepd_step_rec_t *job;
srun_info_t *srun = NULL;
char *in_name;
+ uint16_t cpus = conf->cpus;
+ char *threads_per_core_str;
xassert(msg != NULL);
@@ -597,7 +610,19 @@ batch_stepd_step_rec_create(batch_job_launch_msg_t *msg)
if (msg->cpus_per_node)
job->cpus = msg->cpus_per_node[0];
- format_core_allocs(msg->cred, conf->node_name, conf->cpus,
+ /*
+ * FIXME: This is band-aid for --threads-per-core < ThreadsPerCore
+ * used with --mem-per-cpu.
+ */
+ threads_per_core_str = getenvp(job->env, "SLURM_THREADS_PER_CORE");
+ if (threads_per_core_str) {
+ uint32_t threads_per_core =
+ strtol(threads_per_core_str, NULL, 10);
+ if (threads_per_core && (threads_per_core < conf->threads))
+ cpus = threads_per_core * conf->cores;
+ }
+
+ format_core_allocs(msg->cred, conf->node_name, cpus,
&job->job_alloc_cores, &job->step_alloc_cores,
&job->job_mem, &job->step_mem);
if (job->step_mem && slurm_conf.job_acct_oom_kill)
diff --git a/src/slurmdbd/slurmdbd.c b/src/slurmdbd/slurmdbd.c
index 824b7d32..922d4485 100644
--- a/src/slurmdbd/slurmdbd.c
+++ b/src/slurmdbd/slurmdbd.c
@@ -156,8 +156,7 @@ int main(int argc, char **argv)
* for the first time after an upgrade.
*/
if (slurm_auth_init(NULL) != SLURM_SUCCESS) {
- fatal("Unable to initialize %s authentication plugin",
- slurm_conf.authtype);
+ fatal("Unable to initialize authentication plugins");
}
if (slurm_acct_storage_init() != SLURM_SUCCESS) {
fatal("Unable to initialize %s accounting storage plugin",
diff --git a/src/squeue/opts.c b/src/squeue/opts.c
index 8e58a52e..96a86d41 100644
--- a/src/squeue/opts.c
+++ b/src/squeue/opts.c
@@ -528,9 +528,23 @@ _parse_state( char* str, uint32_t* states )
xstrcat(state_names, ",");
xstrcat(state_names, job_state_string(JOB_RESIZING));
xstrcat(state_names, ",");
+ xstrcat(state_names, job_state_string(JOB_RESV_DEL_HOLD));
+ xstrcat(state_names, ",");
+ xstrcat(state_names, job_state_string(JOB_REQUEUE));
+ xstrcat(state_names, ",");
+ xstrcat(state_names, job_state_string(JOB_REQUEUE_FED));
+ xstrcat(state_names, ",");
+ xstrcat(state_names, job_state_string(JOB_REQUEUE_HOLD));
+ xstrcat(state_names, ",");
xstrcat(state_names, job_state_string(JOB_REVOKED));
xstrcat(state_names, ",");
+ xstrcat(state_names, job_state_string(JOB_SIGNALING));
+ xstrcat(state_names, ",");
xstrcat(state_names, job_state_string(JOB_SPECIAL_EXIT));
+ xstrcat(state_names, ",");
+ xstrcat(state_names, job_state_string(JOB_STAGE_OUT));
+ xstrcat(state_names, ",");
+ xstrcat(state_names, job_state_string(JOB_STOPPED));
error("Valid job states include: %s\n", state_names);
xfree (state_names);
return SLURM_ERROR;
diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c
index 62605e35..895e2913 100644
--- a/src/srun/libsrun/opt.c
+++ b/src/srun/libsrun/opt.c
@@ -1431,7 +1431,7 @@ static void _usage(void)
" [--prolog=fname] [--epilog=fname]\n"
" [--task-prolog=fname] [--task-epilog=fname]\n"
" [--ctrl-comm-ifhn=addr] [--multi-prog] [--mcs-label=mcs]\n"
-" [--cpu-freq=min[-max[:gov]] [--power=flags] [--spread-job]\n"
+" [--cpu-freq=min[-max[:gov]]] [--power=flags] [--spread-job]\n"
" [--switches=max-switches{@max-time-to-wait}] [--reboot]\n"
" [--core-spec=cores] [--thread-spec=threads]\n"
" [--bb=burst_buffer_spec] [--bbf=burst_buffer_file]\n"
@@ -1440,7 +1440,7 @@ static void _usage(void)
" [-w hosts...] [-x hosts...] [--use-min-nodes]\n"
" [--mpi-combine=yes|no] [--het-group=value]\n"
" [--cpus-per-gpu=n] [--gpus=n] [--gpu-bind=...] [--gpu-freq=...]\n"
-" [--gpus-per-node=n] [--gpus-per-socket=n] [--gpus-per-task=n]\n"
+" [--gpus-per-node=n] [--gpus-per-socket=n] [--gpus-per-task=n]\n"
" [--mem-per-gpu=MB]\n"
" executable [args...]\n");
diff --git a/contribs/pmi/Makefile.in b/contribs/pmi/Makefile.in
index 571d7edb..adc602fd 100644
--- a/contribs/pmi/Makefile.in
+++ b/contribs/pmi/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -712,7 +712,8 @@ installdirs:
done
install: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) install-am
-install-exec: install-exec-am
+install-exec: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) install-exec-am
install-data: install-data-am
uninstall: uninstall-am
@@ -815,7 +816,7 @@ ps-am:
uninstall-am: uninstall-libLTLIBRARIES
-.MAKE: all check install install-am install-strip
+.MAKE: all check install install-am install-exec install-strip
.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
clean-generic clean-libLTLIBRARIES clean-libtool cscopelist-am \
diff --git a/src/api/Makefile.in b/src/api/Makefile.in
index ffacb7e9..faf0aec1 100644
--- a/src/api/Makefile.in
+++ b/src/api/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -1049,7 +1049,8 @@ installdirs:
done
install: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) install-am
-install-exec: install-exec-am
+install-exec: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) install-exec-am
install-data: install-data-am
uninstall: uninstall-am
@@ -1253,7 +1254,7 @@ ps-am:
uninstall-am: uninstall-libLTLIBRARIES uninstall-pkglibLTLIBRARIES
-.MAKE: all check install install-am install-strip
+.MAKE: all check install install-am install-exec install-strip
.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
clean-generic clean-libLTLIBRARIES clean-libtool \
diff --git a/testsuite/Makefile.in b/testsuite/Makefile.in
index 4da68bc8..41518925 100644
--- a/testsuite/Makefile.in
+++ b/testsuite/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -620,7 +620,7 @@ site.exp: Makefile $(EXTRA_DEJAGNU_SITE_CONFIG)
@echo '# Do not edit here. If you wish to override these values' >>site.tmp
@echo '# edit the last section' >>site.tmp
@echo 'set srcdir "$(srcdir)"' >>site.tmp
- @echo "set objdir `pwd`" >>site.tmp
+ @echo "set objdir \"`pwd`\"" >>site.tmp
@echo 'set build_alias "$(build_alias)"' >>site.tmp
@echo 'set build_triplet $(build_triplet)' >>site.tmp
@echo 'set host_alias "$(host_alias)"' >>site.tmp
diff --git a/testsuite/slurm_unit/api/Makefile.in b/testsuite/slurm_unit/api/Makefile.in
index 9e03c574..1e8b5513 100644
--- a/testsuite/slurm_unit/api/Makefile.in
+++ b/testsuite/slurm_unit/api/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -414,6 +414,7 @@ am__set_TESTS_bases = \
bases='$(TEST_LOGS)'; \
bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
bases=`echo $$bases`
+AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)'
RECHECK_LOGS = $(TEST_LOGS)
TEST_SUITE_LOG = test-suite.log
TEST_EXTENSIONS = @EXEEXT@ .test
@@ -991,7 +992,7 @@ $(TEST_SUITE_LOG): $(TEST_LOGS)
test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \
fi; \
echo "$${col}$$br$${std}"; \
- echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \
+ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \
echo "$${col}$$br$${std}"; \
create_testsuite_report --maybe-color; \
echo "$$col$$br$$std"; \
diff --git a/testsuite/slurm_unit/common/Makefile.in b/testsuite/slurm_unit/common/Makefile.in
index 602485ae..086d57f5 100644
--- a/testsuite/slurm_unit/common/Makefile.in
+++ b/testsuite/slurm_unit/common/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -487,6 +487,7 @@ am__set_TESTS_bases = \
bases='$(TEST_LOGS)'; \
bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
bases=`echo $$bases`
+AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)'
RECHECK_LOGS = $(TEST_LOGS)
TEST_SUITE_LOG = test-suite.log
TEST_EXTENSIONS = @EXEEXT@ .test
@@ -1200,7 +1201,7 @@ $(TEST_SUITE_LOG): $(TEST_LOGS)
test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \
fi; \
echo "$${col}$$br$${std}"; \
- echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \
+ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \
echo "$${col}$$br$${std}"; \
create_testsuite_report --maybe-color; \
echo "$$col$$br$$std"; \
diff --git a/testsuite/slurm_unit/common/bitstring/Makefile.in b/testsuite/slurm_unit/common/bitstring/Makefile.in
index 567423d2..2c5048ea 100644
--- a/testsuite/slurm_unit/common/bitstring/Makefile.in
+++ b/testsuite/slurm_unit/common/bitstring/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -415,6 +415,7 @@ am__set_TESTS_bases = \
bases='$(TEST_LOGS)'; \
bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
bases=`echo $$bases`
+AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)'
RECHECK_LOGS = $(TEST_LOGS)
AM_RECURSIVE_TARGETS = check recheck
TEST_SUITE_LOG = test-suite.log
@@ -964,7 +965,7 @@ $(TEST_SUITE_LOG): $(TEST_LOGS)
test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \
fi; \
echo "$${col}$$br$${std}"; \
- echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \
+ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \
echo "$${col}$$br$${std}"; \
create_testsuite_report --maybe-color; \
echo "$$col$$br$$std"; \
diff --git a/testsuite/slurm_unit/common/slurm_protocol_pack/Makefile.in b/testsuite/slurm_unit/common/slurm_protocol_pack/Makefile.in
index 12908e4e..7ed04991 100644
--- a/testsuite/slurm_unit/common/slurm_protocol_pack/Makefile.in
+++ b/testsuite/slurm_unit/common/slurm_protocol_pack/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -419,6 +419,7 @@ am__set_TESTS_bases = \
bases='$(TEST_LOGS)'; \
bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
bases=`echo $$bases`
+AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)'
RECHECK_LOGS = $(TEST_LOGS)
AM_RECURSIVE_TARGETS = check recheck
TEST_SUITE_LOG = test-suite.log
@@ -984,7 +985,7 @@ $(TEST_SUITE_LOG): $(TEST_LOGS)
test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \
fi; \
echo "$${col}$$br$${std}"; \
- echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \
+ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \
echo "$${col}$$br$${std}"; \
create_testsuite_report --maybe-color; \
echo "$$col$$br$$std"; \
diff --git a/testsuite/slurm_unit/common/slurmdb_pack/Makefile.in b/testsuite/slurm_unit/common/slurmdb_pack/Makefile.in
index ad58c519..b7fae637 100644
--- a/testsuite/slurm_unit/common/slurmdb_pack/Makefile.in
+++ b/testsuite/slurm_unit/common/slurmdb_pack/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -571,6 +571,7 @@ am__set_TESTS_bases = \
bases='$(TEST_LOGS)'; \
bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
bases=`echo $$bases`
+AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)'
RECHECK_LOGS = $(TEST_LOGS)
AM_RECURSIVE_TARGETS = check recheck
TEST_SUITE_LOG = test-suite.log
@@ -1388,7 +1389,7 @@ $(TEST_SUITE_LOG): $(TEST_LOGS)
test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \
fi; \
echo "$${col}$$br$${std}"; \
- echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \
+ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \
echo "$${col}$$br$${std}"; \
create_testsuite_report --maybe-color; \
echo "$$col$$br$$std"; \
diff --git a/testsuite/slurm_unit/slurmd/common/Makefile.in b/testsuite/slurm_unit/slurmd/common/Makefile.in
index 20091e19..0a8d99ae 100644
--- a/testsuite/slurm_unit/slurmd/common/Makefile.in
+++ b/testsuite/slurm_unit/slurmd/common/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.16.2 from Makefile.am.
+# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
@@ -408,6 +408,7 @@ am__set_TESTS_bases = \
bases='$(TEST_LOGS)'; \
bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
bases=`echo $$bases`
+AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)'
RECHECK_LOGS = $(TEST_LOGS)
AM_RECURSIVE_TARGETS = check recheck
TEST_SUITE_LOG = test-suite.log
@@ -956,7 +957,7 @@ $(TEST_SUITE_LOG): $(TEST_LOGS)
test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \
fi; \
echo "$${col}$$br$${std}"; \
- echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \
+ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \
echo "$${col}$$br$${std}"; \
create_testsuite_report --maybe-color; \
echo "$$col$$br$$std"; \
diff --git a/debian/changelog b/debian/changelog
index 9af06244..855c7e09 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,11 @@
+slurm-wlm (20.11.7-1) unstable; urgency=medium
+
+ * New upstream release fixes CVE-2021-31215 (Closes: #988439)
+ * Add typo patch for acct_gather.conf man page
+ * Update miscellanea manpage patch
+
+ -- Gennaro Oliva <oliva.g@na.icar.cnr.it> Tue, 15 Jun 2021 17:49:41 +0200
+
slurm-wlm (20.11.5-1) unstable; urgency=medium
* New upstream release
diff --git a/debian/patches/fix-typos-acct_gather.conf.5 b/debian/patches/fix-typos-acct_gather.conf.5
new file mode 100644
index 00000000..1d595c9c
--- /dev/null
+++ b/debian/patches/fix-typos-acct_gather.conf.5
@@ -0,0 +1,25 @@
+Description: Fix two typos in acct_gather.conf.5 man page
+Author: Gennaro Oliva <oliva.g@na.icar.cnr.it>
+Forwarded: https://bugs.schedmd.com/show_bug.cgi?id=11627
+Last-Update: 2021-05-16
+
+--- slurm-wlm-20.11.7.orig/doc/man/man5/acct_gather.conf.5
++++ slurm-wlm-20.11.7/doc/man/man5/acct_gather.conf.5
+@@ -12,7 +12,7 @@ DEFAULT_SLURM_CONF parameter or at execu
+ environment variable. The file will always be located in the
+ same directory as the \fBslurm.conf\fP file.
+ .LP
+-Parameter names are case insensitive but parameter values are case sensistive.
++Parameter names are case insensitive but parameter values are case sensitive.
+ Any text following a "#" in the configuration file is treated
+ as a comment through the end of that line.
+ The size of each line in the file is limited to 1024 characters.
+@@ -159,7 +159,7 @@ The InfluxDB plugin provides the same in
+ instead send information to the configured InfluxDB server.
+ .P
+ The InfluxDB plugin is designed against 1.x protocol of InfluxDB. Any site
+-running a v2.x InfluxDB server will need to configure a v1.x compatiblity
++running a v2.x InfluxDB server will need to configure a v1.x compatibility
+ endpoint along with the correct user and password authorization. Token
+ authentication is not currently supported.
+ .SS
diff --git a/debian/patches/miscellanea-manpages b/debian/patches/miscellanea-manpages
index 1db11d83..341971fe 100644
--- a/debian/patches/miscellanea-manpages
+++ b/debian/patches/miscellanea-manpages
@@ -3,11 +3,13 @@ Description: Move general info man pages to the miscellanea section (7)
to the miscellanea section since they are not related to commands
Author: Gennaro Oliva <oliva.g@na.icar.cnr.it>
Forwarded: https://bugs.schedmd.com/show_bug.cgi?id=8719
-Last-Update: 2020-11-05
+Last-Update: 2021-05-17
+diff --git a/configure.ac b/configure.ac
+index d55e537f..61067ba9 100644
--- a/configure.ac
+++ b/configure.ac
-@@ -416,6 +416,7 @@
+@@ -416,6 +416,7 @@ AC_CONFIG_FILES([Makefile
doc/man/man1/Makefile
doc/man/man3/Makefile
doc/man/man5/Makefile
@@ -15,6 +17,8 @@ Last-Update: 2020-11-05
doc/man/man8/Makefile
doc/html/Makefile
doc/html/configurator.html
+diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
+index 5a3ddf7f..e3afc1da 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -1,3 +1,3 @@
@@ -22,9 +26,11 @@ Last-Update: 2020-11-05
-SUBDIRS = man1 man3 man5 man8
+SUBDIRS = man1 man3 man5 man7 man8
+diff --git a/doc/man/man1/Makefile.am b/doc/man/man1/Makefile.am
+index ce6cd5d8..68870920 100644
--- a/doc/man/man1/Makefile.am
+++ b/doc/man/man1/Makefile.am
-@@ -12,7 +12,6 @@
+@@ -12,7 +12,6 @@ man1_MANS = \
scrontab.1 \
sdiag.1 \
sinfo.1 \
@@ -32,37 +38,23 @@ Last-Update: 2020-11-05
sprio.1 \
squeue.1 \
sreport.1 \
---- a/doc/man/man5/slurm.conf.5
-+++ b/doc/man/man5/slurm.conf.5
-@@ -1689,7 +1689,7 @@
- be called before and/or after execution of each task spawned as
- part of a user's job step. Default location is "plugstack.conf"
- in the same directory as the system slurm.conf. For more information
--on SPANK plugins, see the \fBspank\fR(8) manual.
-+on SPANK plugins, see the \fBspank\fR(7) manual.
-
- .TP
- \fBPowerParameters\fR
-@@ -5982,4 +5982,4 @@
- \fBgetrlimit\fR(2), \fBgres.conf\fR(5), \fBgroup\fR(5), \fBhostname\fR(1),
- \fBscontrol\fR(1), \fBslurmctld\fR(8), \fBslurmd\fR(8),
- \fBslurmdbd\fR(8), \fBslurmdbd.conf\fR(5), \fBsrun\fR(1),
--\fBspank\fR(8), \fBsyslog\fR(3), \fBtopology.conf\fR(5)
-+\fBspank\fR(7), \fBsyslog\fR(3), \fBtopology.conf\fR(5)
+diff --git a/doc/man/man7/Makefile.am b/doc/man/man7/Makefile.am
+new file mode 100644
+index 00000000..dfdbe959
--- /dev/null
+++ b/doc/man/man7/Makefile.am
-@@ -0,0 +1,22 @@
+@@ -0,0 +1,23 @@
+htmldir = ${datadir}/doc/${PACKAGE}-${SLURM_VERSION_STRING}/html
+
+man7_MANS = slurm.7 \
-+ spank.7
++ spank.7
+
+EXTRA_DIST = $(man7_MANS)
+
+if HAVE_MAN2HTML
+
+html_DATA = \
-+ spank.html
++ spank.html
+
+MOSTLYCLEANFILES = ${html_DATA}
+
@@ -74,1477 +66,64 @@ Last-Update: 2020-11-05
+ `dirname $<`/../man2html.py @SLURM_MAJOR@.@SLURM_MINOR@ $(srcdir)/../../html/header.txt $(srcdir)/../../html/footer.txt $<
+
+endif
++
+diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
+index 1978f6e6..76498e50 100644
+--- a/doc/man/man5/slurm.conf.5
++++ b/doc/man/man5/slurm.conf.5
+@@ -1696,7 +1696,7 @@ This provides support for a highly configurable set of plugins to
+ be called before and/or after execution of each task spawned as
+ part of a user's job step. Default location is "plugstack.conf"
+ in the same directory as the system slurm.conf. For more information
+-on SPANK plugins, see the \fBspank\fR(8) manual.
++on SPANK plugins, see the \fBspank\fR(7) manual.
+
+ .TP
+ \fBPowerParameters\fR
+@@ -6009,4 +6009,4 @@ details.
+ \fBgetrlimit\fR(2), \fBgres.conf\fR(5), \fBgroup\fR(5), \fBhostname\fR(1),
+ \fBscontrol\fR(1), \fBslurmctld\fR(8), \fBslurmd\fR(8),
+ \fBslurmdbd\fR(8), \fBslurmdbd.conf\fR(5), \fBsrun\fR(1),
+-\fBspank\fR(8), \fBsyslog\fR(3), \fBtopology.conf\fR(5)
++\fBspank\fR(7), \fBsyslog\fR(3), \fBtopology.conf\fR(5)
+diff --git a/doc/man/man1/slurm.1 b/doc/man/man7/slurm.7
+similarity index 97%
+rename from doc/man/man1/slurm.1
+rename to doc/man/man7/slurm.7
+index ecb5617f..a043fb73 100644
--- a/doc/man/man1/slurm.1
-+++ /dev/null
-@@ -1,72 +0,0 @@
--.TH Slurm "1" "Slurm System" "June 2018" "Slurm System"
--
--.SH "NAME"
--Slurm \- Slurm Workload Manager overview.
--
--.SH "DESCRIPTION"
--The Slurm Workload Manager is an open source,
--fault-tolerant, and highly scalable cluster management and job scheduling system
--for large and small Linux clusters. Slurm requires no kernel modifications for
--its operation and is relatively self-contained. As a cluster resource manager,
--Slurm has three key functions. First, it allocates exclusive and/or non-exclusive
--access to resources (compute nodes) to users for some duration of time so they
--can perform work. Second, it provides a framework for starting, executing, and
--monitoring work (normally a parallel job) on the set of allocated nodes.
--Finally, it arbitrates contention for resources by managing a queue of
--pending work.
--Optional plugins can be used for accounting, advanced reservation,
--gang scheduling (time sharing for parallel jobs), backfill scheduling,
--resource limits by user or bank account,
--and sophisticated multifactor job prioritization algorithms.
--
--Slurm has a centralized manager, \fBslurmctld\fR, to monitor resources and
--work. There may also be a backup manager to assume those responsibilities in the
--event of failure. Each compute server (node) has a \fBslurmd\fR daemon, which
--can be compared to a remote shell: it waits for work, executes that work, returns
--status, and waits for more work. An optional \fBslurmdbd\fR (Slurm DataBase Daemon)
--can be used for accounting purposes and to maintain resource limit information.
--
--Basic user tools include \fBsrun\fR to initiate jobs,
--\fBscancel\fR to terminate queued or running jobs, \fBsinfo\fR to report system
--status, and \fBsqueue\fR to report the status of jobs. There is also an administrative
--tool \fBscontrol\fR available to monitor and/or modify configuration and state
--information. APIs are available for all functions.
--
--Slurm configuration is maintained in the \fBslurm.conf\fR file.
--
--Man pages are available for all Slurm commands, daemons, APIs, plus the
--\fBslurm.conf\fR file.
--Extensive documentation is also available on the internet at
--\fB<https://slurm.schedmd.com/>\fR.
--
--.SH "COPYING"
--Copyright (C) 2005\-2007 The Regents of the University of California.
--Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
--.br
--Copyright (C) 2008\-2009 Lawrence Livermore National Security.
--.br
--Copyright (C) 2010\-2013 SchedMD LLC.
--.LP
--This file is part of Slurm, a resource management program.
--For details, see <https://slurm.schedmd.com/>.
--.LP
--Slurm is free software; you can redistribute it and/or modify it under
--the terms of the GNU General Public License as published by the Free
--Software Foundation; either version 2 of the License, or (at your option)
--any later version.
--.LP
--Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
--WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
--FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
--details.
--
--.SH "SEE ALSO"
--\fBsacct\fR(1), \fBsacctmgr\fR(1), \fBsalloc\fR(1), \fBsattach\fR(1),
--\fBsbatch\fR(1), \fBsbcast\fR(1), \fBscancel\fR(1), \fBscontrol\fR(1),
--\fBsinfo\fR(1), \fBsqueue\fR(1), \fBsreport\fR(1),
--\fBsrun\fR(1), \fBsshare\fR(1), \fBsstat\fR(1), \fBstrigger\fR(1),
--\fBsview\fR(1),
--\fBslurm.conf\fR(5), \fBslurmdbd.conf\fR(5),
--\fBslurmctld\fR(8), \fBslurmd\fR(8), \fBslurmdbd\fR(8), \fBslurmstepd\fR(8),
--\fBspank\fR(8)
--
---- /dev/null
+++ b/doc/man/man7/slurm.7
-@@ -0,0 +1,72 @@
+@@ -1,4 +1,4 @@
+-.TH Slurm "1" "Slurm System" "June 2018" "Slurm System"
+.TH Slurm "7" "Slurm System" "June 2018" "Slurm System"
-+
-+.SH "NAME"
-+Slurm \- Slurm Workload Manager overview.
-+
-+.SH "DESCRIPTION"
-+The Slurm Workload Manager is an open source,
-+fault-tolerant, and highly scalable cluster management and job scheduling system
-+for large and small Linux clusters. Slurm requires no kernel modifications for
-+its operation and is relatively self-contained. As a cluster resource manager,
-+Slurm has three key functions. First, it allocates exclusive and/or non-exclusive
-+access to resources (compute nodes) to users for some duration of time so they
-+can perform work. Second, it provides a framework for starting, executing, and
-+monitoring work (normally a parallel job) on the set of allocated nodes.
-+Finally, it arbitrates contention for resources by managing a queue of
-+pending work.
-+Optional plugins can be used for accounting, advanced reservation,
-+gang scheduling (time sharing for parallel jobs), backfill scheduling,
-+resource limits by user or bank account,
-+and sophisticated multifactor job prioritization algorithms.
-+
-+Slurm has a centralized manager, \fBslurmctld\fR, to monitor resources and
-+work. There may also be a backup manager to assume those responsibilities in the
-+event of failure. Each compute server (node) has a \fBslurmd\fR daemon, which
-+can be compared to a remote shell: it waits for work, executes that work, returns
-+status, and waits for more work. An optional \fBslurmdbd\fR (Slurm DataBase Daemon)
-+can be used for accounting purposes and to maintain resource limit information.
-+
-+Basic user tools include \fBsrun\fR to initiate jobs,
-+\fBscancel\fR to terminate queued or running jobs, \fBsinfo\fR to report system
-+status, and \fBsqueue\fR to report the status of jobs. There is also an administrative
-+tool \fBscontrol\fR available to monitor and/or modify configuration and state
-+information. APIs are available for all functions.
-+
-+Slurm configuration is maintained in the \fBslurm.conf\fR file.
-+
-+Man pages are available for all Slurm commands, daemons, APIs, plus the
-+\fBslurm.conf\fR file.
-+Extensive documentation is also available on the internet at
-+\fB<https://slurm.schedmd.com/>\fR.
-+
-+.SH "COPYING"
-+Copyright (C) 2005\-2007 The Regents of the University of California.
-+Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
-+.br
-+Copyright (C) 2008\-2009 Lawrence Livermore National Security.
-+.br
-+Copyright (C) 2010\-2013 SchedMD LLC.
-+.LP
-+This file is part of Slurm, a resource management program.
-+For details, see <https://slurm.schedmd.com/>.
-+.LP
-+Slurm is free software; you can redistribute it and/or modify it under
-+the terms of the GNU General Public License as published by the Free
-+Software Foundation; either version 2 of the License, or (at your option)
-+any later version.
-+.LP
-+Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
-+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-+details.
-+
-+.SH "SEE ALSO"
-+\fBsacct\fR(1), \fBsacctmgr\fR(1), \fBsalloc\fR(1), \fBsattach\fR(1),
-+\fBsbatch\fR(1), \fBsbcast\fR(1), \fBscancel\fR(1), \fBscontrol\fR(1),
-+\fBsinfo\fR(1), \fBsqueue\fR(1), \fBsreport\fR(1),
-+\fBsrun\fR(1), \fBsshare\fR(1), \fBsstat\fR(1), \fBstrigger\fR(1),
-+\fBsview\fR(1),
-+\fBslurm.conf\fR(5), \fBslurmdbd.conf\fR(5),
-+\fBslurmctld\fR(8), \fBslurmd\fR(8), \fBslurmdbd\fR(8), \fBslurmstepd\fR(8),
+
+ .SH "NAME"
+ Slurm \- Slurm Workload Manager overview.
+@@ -68,5 +68,5 @@ details.
+ \fBsview\fR(1),
+ \fBslurm.conf\fR(5), \fBslurmdbd.conf\fR(5),
+ \fBslurmctld\fR(8), \fBslurmd\fR(8), \fBslurmdbd\fR(8), \fBslurmstepd\fR(8),
+-\fBspank\fR(8)
+\fBspank\fR(7)
-+
+
+diff --git a/doc/man/man8/spank.8 b/doc/man/man7/spank.7
+similarity index 99%
+rename from doc/man/man8/spank.8
+rename to doc/man/man7/spank.7
+index 4609dbf9..91d0996d 100644
--- a/doc/man/man8/spank.8
-+++ /dev/null
-@@ -1,656 +0,0 @@
--.TH SPANK "8" "Slurm Component" "April 2020" "Slurm Component"
--
--.SH "NAME"
--\fBSPANK\fR \- Slurm Plug\-in Architecture for Node and job (K)control
--
--.SH "DESCRIPTION"
--This manual briefly describes the capabilities of the Slurm Plug\-in
--architecture for Node and job Kontrol (\fBSPANK\fR) as well as the \fBSPANK\fR
--configuration file: (By default: \fBplugstack.conf\fP.)
--.LP
--\fBSPANK\fR provides a very generic interface for stackable plug\-ins
--which may be used to dynamically modify the job launch code in
--Slurm. \fBSPANK\fR plugins may be built without access to Slurm source
--code. They need only be compiled against Slurm's \fBspank.h\fR header file,
--added to the \fBSPANK\fR config file \fBplugstack.conf\fR,
--and they will be loaded at runtime during the next job launch. Thus,
--the \fBSPANK\fR infrastructure provides administrators and other developers
--a low cost, low effort ability to dynamically modify the runtime
--behavior of Slurm job launch.
--.LP
--\fBNote\fR: \fBSPANK\fR plugins using the Slurm APIs need to be recompiled when
--upgrading Slurm to a new major release.
--.LP
--
--.SH "SPANK PLUGINS"
--\fBSPANK\fR plugins are loaded in up to five separate contexts during a
--\fBSlurm\fR job. Briefly, the five contexts are:
--.TP 8
--\fBlocal\fB
--In \fBlocal\fR context, the plugin is loaded by \fBsrun\fR. (i.e. the "local"
--part of a parallel job).
--.TP
--\fBremote\fR
--In \fBremote\fR context, the plugin is loaded by \fBslurmstepd\fR. (i.e. the "remote"
--part of a parallel job).
--.TP
--\fBallocator\fR
--In \fBallocator\fR context, the plugin is loaded in one of the job allocation
--utilities \fBsbatch\fR or \fBsalloc\fR.
--.LP
--.TP
--\fBslurmd\fR In \fBslurmd\fR context, the plugin is loaded in the
--\fBslurmd\fR daemon itself. \fBNote\fR: Plugins loaded in slurmd context
--persist for the entire time slurmd is running, so if configuration is
--changed or plugins are updated, slurmd must be restarted for the changes
--to take effect.
--.LP
--.TP
--\fBjob_script\fR
--In the \fBjob_script\fR context, plugins are loaded in the context of the
--job prolog or epilog. \fBNote\fR: Plugins are loaded in \fBjob_script\fR
--context on each run on the job prolog or epilog, in a separate address
--space from plugins in \fBslurmd\fR context. This means there is no
--state shared between this context and other contexts, or even between
--one call to \fBslurm_spank_job_prolog\fR or \fBslurm_spank_job_epilog\fR
--and subsequent calls.
--.LP
--In local context, only the \fBinit\fR, \fBexit\fR, \fBinit_post_opt\fR, and
--\fBlocal_user_init\fR functions are called. In allocator context, only the
--\fBinit\fR, \fBexit\fR, and \fBinit_post_opt\fR functions are called.
--Similarly, in slurmd context, only the \fBinit\fR and \fBslurmd_exit\fR
--callbacks are active, and in the job_script context, only the \fBjob_prolog\fR
--and \fBjob_epilog\fR callbacks are used.
--Plugins may query the context in which they are running with the
--\fBspank_context\fR and \fBspank_remote\fR functions defined in
--\fB<slurm/spank.h>\fR.
--.LP
--\fBSPANK\fR plugins may be called from multiple points during the Slurm job
--launch. A plugin may define the following functions:
--.TP 2
--\fBslurm_spank_init\fR
--Called just after plugins are loaded. In remote context, this is just
--after job step is initialized. This function is called before any plugin
--option processing.
--.TP
--\fBslurm_spank_job_prolog\fR
--Called at the same time as the job prolog. If this function returns a
--negative value and the \fBSPANK\fR plugin that contains it is required in the
--\fBplugstack.conf\fR, the node that this is run on will be drained.
--
--.TP
--\fBslurm_spank_init_post_opt\fR
--Called at the same point as \fBslurm_spank_init\fR, but after all
--user options to the plugin have been processed. The reason that the
--\fBinit\fR and \fBinit_post_opt\fR callbacks are separated is so that
--plugins can process system-wide options specified in plugstack.conf in
--the \fBinit\fR callback, then process user options, and finally take some
--action in \fBslurm_spank_init_post_opt\fR if necessary.
--In the case of a heterogeneous job, \fBslurm_spank_init\fR is invoked once
--per job component.
--.TP
--\fBslurm_spank_local_user_init\fR
--Called in local (\fBsrun\fR) context only after all
--options have been processed.
--This is called after the job ID and step IDs are available.
--This happens in \fBsrun\fR after the allocation is made, but before
--tasks are launched.
--.TP
--\fBslurm_spank_user_init\fR
--Called after privileges are temporarily dropped. (remote context only)
--.TP
--\fBslurm_spank_task_init_privileged\fR
--Called for each task just after fork, but before all elevated privileges
--are dropped. (remote context only)
--.TP
--\fBslurm_spank_task_init\fR
--Called for each task just before execve (2). If you are restricing memory
--with cgroups, memory allocated here will be in the job's cgroup. (remote
--context only)
--.TP
--\fBslurm_spank_task_post_fork\fR
--Called for each task from parent process after fork (2) is complete.
--Due to the fact that \fBslurmd\fR does not exec any tasks until all
--tasks have completed fork (2), this call is guaranteed to run before
--the user task is executed. (remote context only)
--.TP
--\fBslurm_spank_task_exit\fR
--Called for each task as its exit status is collected by Slurm.
--(remote context only)
--.TP
--\fBslurm_spank_exit\fR
--Called once just before \fBslurmstepd\fR exits in remote context.
--In local context, called before \fBsrun\fR exits.
--.TP
--\fBslurm_spank_job_epilog\fR
--Called at the same time as the job epilog. If this function returns a
--negative value and the \fBSPANK\fR plugin that contains it is required in the
--\fBplugstack.conf\fR, the node that this is run on will be drained.
--.TP
--\fBslurm_spank_slurmd_exit\fR
--Called in slurmd when the daemon is shut down.
--.LP
--All of these functions have the same prototype, for example:
--.nf
--
-- int \fBslurm_spank_init\fR (spank_t spank, int ac, char *argv[])
--
--.fi
--.LP
--Where \fBspank\fR is the \fBSPANK\fR handle which must be passed back to
--Slurm when the plugin calls functions like \fBspank_get_item\fR and
--\fBspank_getenv\fR. Configured arguments (See \fBCONFIGURATION\fR
--below) are passed in the argument vector \fBargv\fR with argument
--count \fBac\fR.
--.LP
--\fBSPANK\fR plugins can query the current list of supported slurm_spank
--symbols to determine if the current version supports a given plugin hook.
--This may be useful because the list of plugin symbols may grow in the
--future. The query is done using the \fBspank_symbol_supported\fR function,
--which has the following prototype:
--.nf
--
-- int \fBspank_symbol_supported\fR (const char *sym);
--
--.fi
--.LP
--The return value is 1 if the symbol is supported, 0 if not.
--.LP
--\fBSPANK\fR plugins do not have direct access to internally defined Slurm
--data structures. Instead, information about the currently executing
--job is obtained via the \fBspank_get_item\fR function call.
--.nf
--
-- spank_err_t \fBspank_get_item\fR (spank_t spank, spank_item_t item, ...);
--
--.fi
--The \fBspank_get_item\fR call must be passed the current \fBSPANK\fR
--handle as well as the item requested, which is defined by the
--passed \fBspank_item_t\fR. A variable number of pointer arguments are also
--passed, depending on which item was requested by the plugin. A
--list of the valid values for \fBitem\fR is kept in the \fBspank.h\fR header
--file. Some examples are:
--.TP 2
--\fBS_JOB_UID\fR
--User id for running job. (uid_t *) is third arg of \fBspank_get_item\fR
--.TP
--\fBS_JOB_STEPID\fR
--Job step id for running job. (uint32_t *) is third arg of \fBspank_get_item\fR.
--.TP
--\fBS_TASK_EXIT_STATUS\fR
--Exit status for exited task. Only valid from \fBslurm_spank_task_exit\fR.
--(int *) is third arg of \fBspank_get_item\fR.
--.TP
--\fBS_JOB_ARGV\fR
--Complete job command line. Third and fourth args to \fBspank_get_item\fR
--are (int *, char ***).
--.LP
--See \fBspank.h\fR for more details, and \fBEXAMPLES\fR below for an example
--of \fBspank_get_item\fR usage.
--.LP
--\fBSPANK\fR functions in the \fBlocal\fB and \fBallocator\fR environment should
--use the \fBgetenv\fR, \fBsetenv\fR, and \fBunsetenv\fR functions to view and
--modify the job's environment.
--\fBSPANK\fR functions in the \fBremote\fR environment should use the
--\fBspank_getenv\fR, \fBspank_setenv\fR, and \fBspank_unsetenv\fR functions to
--view and modify the job's environment. \fBspank_getenv\fR
--searches the job's environment for the environment variable
--\fIvar\fR and copies the current value into a buffer \fIbuf\fR
--of length \fIlen\fR. \fBspank_setenv\fR allows a \fBSPANK\fR
--plugin to set or overwrite a variable in the job's environment,
--and \fBspank_unsetenv\fR unsets an environment variable in
--the job's environment. The prototypes are:
--.nf
--
-- spank_err_t \fBspank_getenv\fR (spank_t spank, const char *var,
-- char *buf, int len);
-- spank_err_t \fBspank_setenv\fR (spank_t spank, const char *var,
-- const char *val, int overwrite);
-- spank_err_t \fBspank_unsetenv\fR (spank_t spank, const char *var);
--.fi
--.LP
--These are only necessary in remote context since modifications of
--the standard process environment using \fBsetenv\fR (3), \fBgetenv\fR (3),
--and \fBunsetenv\fR (3) may be used in local context.
--.LP
--Functions are also available from within the \fBSPANK\fR plugins to
--establish environment variables to be exported to the Slurm
--\fBPrologSlurmctld\fR, \fBProlog\fR, \fBEpilog\fR and \fBEpilogSlurmctld\fR
--programs (the so-called \fBjob control\fR environment).
--The name of environment variables established by these calls will be prepended
--with the string \fISPANK_\fR in order to avoid any security implications
--of arbitrary environment variable control. (After all, the job control
--scripts do run as root or the Slurm user.).
--.LP
--These functions are available from \fBlocal\fR context only.
--.nf
--
-- spank_err_t \fBspank_job_control_getenv\fR(spank_t spank, const char *var,
-- char *buf, int len);
-- spank_err_t \fBspank_job_control_setenv\fR(spank_t spank, const char *var,
-- const char *val, int overwrite);
-- spank_err_t \fBspank_job_control_unsetenv\fR(spank_t spank, const char *var);
--.fi
--.LP
--See \fBspank.h\fR for more information, and \fBEXAMPLES\fR below for an example
--for \fBspank_getenv\fR usage.
--.LP
--Many of the described \fBSPANK\fR functions available to plugins return
--errors via the \fBspank_err_t\fR error type. On success, the return value
--will be set to \fBESPANK_SUCCESS\fR, while on failure, the return value
--will be set to one of many error values defined in slurm/spank.h. The
--\fBSPANK\fR interface provides a simple function
--.nf
--
-- const char * \fBspank_strerror\fR(spank_err_t err);
--
--.fi
--which may be used to translate a \fBspank_err_t\fR value into its
--string representation.
--
--.LP
--The \fBslurm_spank_log\fR function can be used to print messages back to the
--user at an error level. This is to keep users from having to rely on the
--\fBslurm_error\fR function, which can be confusing because it prepends
--"\fBerror:\fR" to every message.
--
--.SH "SPANK OPTIONS"
--.LP
--SPANK plugins also have an interface through which they may define
--and implement extra job options. These options are made available to
--the user through Slurm commands such as \fBsrun\fR(1), \fBsalloc\fR(1),
--and \fBsbatch\fR(1). If the option is specified by the user, its value is
--forwarded and registered with the plugin in slurmd when the job is run.
--In this way, \fBSPANK\fR plugins may dynamically provide new options and
--functionality to Slurm.
--.LP
--Each option registered by a plugin to Slurm takes the form of
--a \fBstruct spank_option\fR which is declared in \fB<slurm/spank.h>\fR as
--.nf
--
-- struct spank_option {
-- char * name;
-- char * arginfo;
-- char * usage;
-- int has_arg;
-- int val;
-- spank_opt_cb_f cb;
-- };
--
--.fi
--
--Where
--.TP
--.I name
--is the name of the option. Its length is limited to \fBSPANK_OPTION_MAXLEN\fR
--defined in \fB<slurm/spank.h>\fR.
--.TP
--.I arginfo
--is a description of the argument to the option, if the option does take
--an argument.
--.TP
--.I usage
--is a short description of the option suitable for \-\-help output.
--.TP
--.I has_arg
--0 if option takes no argument, 1 if option takes an argument, and
--2 if the option takes an optional argument. (See \fBgetopt_long\fR (3)).
--.TP
--.I val
--A plugin\-local value to return to the option callback function.
--.TP
--.I cb
--A callback function that is invoked when the plugin option is
--registered with Slurm. \fBspank_opt_cb_f\fR is typedef'd in
--\fB<slurm/spank.h>\fR as
--.nf
--
-- typedef int (*spank_opt_cb_f) (int val, const char *optarg,
-- int remote);
--
--.fi
--Where \fIval\fR is the value of the \fIval\fR field in the \fBspank_option\fR
--struct, \fIoptarg\fR is the supplied argument if applicable, and \fIremote\fR
--is 0 if the function is being called from the "local" host (e.g. host where
--\fBsrun\fR or \fBsbatch/salloc\fR are invoked) or 1 from the "remote" host
--(host where slurmd/slurmstepd run) but only executed by \fBslurmstepd\fR
--(remote context) if the option was registered for such context.
--.LP
--Plugin options may be registered with Slurm using
--the \fBspank_option_register\fR function. This function is only valid
--when called from the plugin's \fBslurm_spank_init\fR handler, and
--registers one option at a time. The prototype is
--.nf
--
-- spank_err_t spank_option_register (spank_t sp,
-- struct spank_option *opt);
--
--.fi
--This function will return \fBESPANK_SUCCESS\fR on successful registration
--of an option, or \fBESPANK_BAD_ARG\fR for errors including invalid spank_t
--handle, or when the function is not called from the \fBslurm_spank_init\fR
--function. All options need to be registered from all contexts in which
--they will be used. For instance, if an option is only used in local (srun)
--and remote (slurmd) contexts, then \fBspank_option_register\fR
--should only be called from within those contexts. For example:
--.nf
--
-- if (spank_context() != S_CTX_ALLOCATOR)
-- spank_option_register (sp, opt);
--
--.fi
--If, however, the option is used in all contexts, the \fBspank_option_register\fR
--needs to be called everywhere.
--.LP
--In addition to \fBspank_option_register\fR, plugins may also export options
--to Slurm by defining a table of \fBstruct spank_option\fR with the
--symbol name \fBspank_options\fR. This method, however, is not supported
--for use with \fBsbatch\fR and \fBsalloc\fR (allocator context), thus
--the use of \fBspank_option_register\fR is preferred. When using the
--\fBspank_options\fR table, the final element in the array must be
--filled with zeros. A \fBSPANK_OPTIONS_TABLE_END\fR macro is provided
--in \fB<slurm/spank.h>\fR for this purpose.
--.LP
--When an option is provided by the user on the local side, either by command line
--options or by environment variables, \fBSlurm\fR will immediately invoke the
--option's callback with \fIremote\fR=0. This is meant for the plugin to do local
--sanity checking of the option before the value is sent to the remote side during
--job launch. If the argument the user specified is invalid, the plugin should
--issue an error and issue a non\-zero return code from the callback. The plugin
--should be able to handle cases where the spank option is set multiple times
--through environment variables and command line options. Environment variables
--are processed before command line options.
--.LP
--On the remote side, options and their arguments are registered just
--after \fBSPANK\fR plugins are loaded and before the \fBspank_init\fR
--handler is called. This allows plugins to modify behavior of all plugin
--functionality based on the value of user\-provided options.
--(See EXAMPLES below for a plugin that registers an option with \fBSlurm\fR).
--.LP
--As an alternative to use of an option callback and global variable,
--plugins can use the \fBspank_option_getopt\fR option to check for
--supplied options after option processing. This function has the prototype:
--.nf
--
-- spank_err_t spank_option_getopt(spank_t sp,
-- struct spank_option *opt, char **optargp);
--
--.nf
--This function returns \fBESPANK_SUCCESS\fR if the option defined in the
--struct spank_option \fIopt\fR has been used by the user. If \fIoptargp\fR
--is non-NULL then it is set to any option argument passed (if the option
--takes an argument). The use of this method is \fIrequired\fR to process
--options in \fBjob_script\fR context (\fBslurm_spank_job_prolog\fR and
--\fBslurm_spank_job_epilog\fR). This function is valid in the following contexts:
--slurm_spank_job_prolog, slurm_spank_local_user_init, slurm_spank_user_init,
--slurm_spank_task_init_privileged, slurm_spank_task_init, slurm_spank_task_exit,
--and slurm_spank_job_epilog.
--
--.SH "CONFIGURATION"
--.LP
--The default \fBSPANK\fR plug\-in stack configuration file is
--\fBplugstack.conf\fR in the same directory as \fBslurm.conf\fR(5),
--though this may be changed via the Slurm config parameter
--\fIPlugStackConfig\fR. Normally the \fBplugstack.conf\fR file
--should be identical on all nodes of the cluster.
--The config file lists \fBSPANK\fR plugins,
--one per line, along with whether the plugin is \fIrequired\fR or
--\fIoptional\fR, and any global arguments that are to be passed to
--the plugin for runtime configuration. Comments are preceded with '#'
--and extend to the end of the line. If the configuration file
--is missing or empty, it will simply be ignored.
--.LP
--The format of each non\-comment line in the configuration file is:
--\fB
--.nf
--
-- required/optional plugin arguments
--
--.fi
--\fR For example:
--.nf
--
-- optional /usr/lib/slurm/test.so
--
--.fi
--Tells \fBslurmd\fR to load the plugin \fBtest.so\fR passing no arguments.
--If a \fBSPANK\fR plugin is \fIrequired\fR, then failure of any of the
--plugin's functions will cause \fBslurmd\fR to terminate the job, while
--\fIoptional\fR plugins only cause a warning.
--.LP
--If a fully\-qualified path is not specified for a plugin, then the
--currently configured \fIPluginDir\fR in \fBslurm.conf\fR(5) is searched.
--.LP
--\fBSPANK\fR plugins are stackable, meaning that more than one plugin may
--be placed into the config file. The plugins will simply be called
--in order, one after the other, and appropriate action taken on
--failure given that state of the plugin's \fIoptional\fR flag.
--.LP
--Additional config files or directories of config files may be included
--in \fBplugstack.conf\fR with the \fBinclude\fR keyword. The \fBinclude\fR
--keyword must appear on its own line, and takes a glob as its parameter,
--so multiple files may be included from one \fBinclude\fR line. For
--example, the following syntax will load all config files in the
--/etc/slurm/plugstack.conf.d directory, in local collation order:
--.nf
--
-- include /etc/slurm/plugstack.conf.d/*
--
--.fi
--which might be considered a more flexible method for building up
--a spank plugin stack.
--.LP
--The \fBSPANK\fR config file is re\-read on each job launch, so editing
--the config file will not affect running jobs. However care should
--be taken so that a partially edited config file is not read by a
--launching job.
--
--.SH "EXAMPLES"
--.LP
--Simple \fBSPANK\fR config file:
--.nf
--
--#
--# SPANK config file
--#
--# required? plugin args
--#
--optional renice.so min_prio=\-10
--required /usr/lib/slurm/test.so
--
--.fi
--.LP
--The following is a simple \fBSPANK\fR plugin to modify the nice value
--of job tasks. This plugin adds a \-\-renice=[prio] option to \fBsrun\fR
--which users can use to set the priority of all remote tasks. Priority may
--also be specified via a SLURM_RENICE environment variable. A minimum
--priority may be established via a "min_prio" parameter in \fBplugstack.conf\fR
--(See above for example).
--.nf
--
--/*
-- * To compile:
-- * gcc \-shared \-o renice.so renice.c
-- *
-- */
--#include <sys/types.h>
--#include <stdio.h>
--#include <stdlib.h>
--#include <unistd.h>
--#include <string.h>
--#include <sys/resource.h>
--
--#include <slurm/spank.h>
--
--/*
-- * All spank plugins must define this macro for the
-- * Slurm plugin loader.
-- */
--SPANK_PLUGIN(renice, 1);
--
--#define PRIO_ENV_VAR "SLURM_RENICE"
--#define PRIO_NOT_SET 42
--
--/*
-- * Minimum allowable value for priority. May be
-- * set globally via plugin option min_prio=<prio>
-- */
--static int min_prio = \-20;
--
--static int prio = PRIO_NOT_SET;
--
--static int _renice_opt_process (int val,
-- const char *optarg,
-- int remote);
--static int _str2prio (const char *str, int *p2int);
--
--/*
-- * Provide a \-\-renice=[prio] option to srun:
-- */
--struct spank_option spank_options[] =
--{
-- { "renice", "[prio]",
-- "Re\-nice job tasks to priority [prio].", 2, 0,
-- (spank_opt_cb_f) _renice_opt_process
-- },
-- SPANK_OPTIONS_TABLE_END
--};
--
--/*
-- * Called from both srun and slurmd.
-- */
--int slurm_spank_init (spank_t sp, int ac, char **av)
--{
-- int i;
--
-- /* Don't do anything in sbatch/salloc */
-- if (spank_context () == S_CTX_ALLOCATOR)
-- return (0);
--
-- for (i = 0; i < ac; i++) {
-- if (strncmp ("min_prio=", av[i], 9) == 0) {
-- const char *optarg = av[i] + 9;
-- if (_str2prio (optarg, &min_prio) < 0)
-- slurm_error ("Ignoring invalid min_prio value: %s",
-- av[i]);
-- } else {
-- slurm_error ("renice: Invalid option: %s", av[i]);
-- }
-- }
--
-- if (!spank_remote (sp))
-- slurm_verbose ("renice: min_prio = %d", min_prio);
--
-- return (0);
--}
--
--
--int slurm_spank_task_post_fork (spank_t sp, int ac, char **av)
--{
-- pid_t pid;
-- int taskid;
--
-- if (prio == PRIO_NOT_SET) {
-- /* See if SLURM_RENICE env var is set by user */
-- char val [1024];
--
-- if (spank_getenv (sp, PRIO_ENV_VAR, val, 1024)
-- != ESPANK_SUCCESS)
-- return (0);
--
-- if (_str2prio (val, &prio) < 0) {
-- slurm_error ("Bad value for %s: %s",
-- PRIO_ENV_VAR, optarg);
-- return (\-1);
-- }
--
-- if (prio < min_prio) {
-- slurm_error ("%s=%d not allowed, using min=%d",
-- PRIO_ENV_VAR, prio, min_prio);
-- }
-- }
--
-- if (prio < min_prio)
-- prio = min_prio;
--
-- spank_get_item (sp, S_TASK_GLOBAL_ID, &taskid);
-- spank_get_item (sp, S_TASK_PID, &pid);
--
-- slurm_info ("re\-nicing task%d pid %ld to %ld",
-- taskid, pid, prio);
--
-- if (setpriority (PRIO_PROCESS, (int) pid,
-- (int) prio) < 0) {
-- slurm_error ("setpriority: %m");
-- return (\-1);
-- }
--
-- return (0);
--}
--
--static int _str2prio (const char *str, int *p2int)
--{
-- long int l;
-- char *p;
--
-- l = strtol (str, &p, 10);
-- if ((*p != '\0') || (l < \-20) || (l > 20))
-- return (\-1);
--
-- *p2int = (int) l;
--
-- return (0);
--}
--
--static int _renice_opt_process (int val,
-- const char *optarg,
-- int remote)
--{
-- if (optarg == NULL) {
-- slurm_error ("renice: invalid argument!");
-- return (\-1);
-- }
--
-- if (_str2prio (optarg, &prio) < 0) {
-- slurm_error ("Bad value for \-\-renice: %s",
-- optarg);
-- return (\-1);
-- }
--
-- if (prio < min_prio) {
-- slurm_error ("\-\-renice=%d not allowed, will use min=%d",
-- prio, min_prio);
-- }
--
-- return (0);
--}
--
--.fi
--
--.SH "COPYING"
--Portions copyright (C) 2010-2018 SchedMD LLC.
--Copyright (C) 2006 The Regents of the University of California.
--Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
--CODE\-OCEC\-09\-009. All rights reserved.
--.LP
--This file is part of Slurm, a resource management program.
--For details, see <https://slurm.schedmd.com/>.
--.LP
--Slurm is free software; you can redistribute it and/or modify it under
--the terms of the GNU General Public License as published by the Free
--Software Foundation; either version 2 of the License, or (at your option)
--any later version.
--.LP
--Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
--WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
--FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
--details.
--.SH "FILES"
--\fB/etc/slurm/slurm.conf\fR \- Slurm configuration file.
--.br
--\fB/etc/slurm/plugstack.conf\fR \- SPANK configuration file.
--.br
--\fB/usr/include/slurm/spank.h\fR \- SPANK header file.
--.SH "SEE ALSO"
--.LP
--\fBsrun\fR(1), \fBslurm.conf\fR(5)
---- /dev/null
+++ b/doc/man/man7/spank.7
-@@ -0,0 +1,656 @@
-+.TH SPANK "7" "Slurm Component" "April 2020" "Slurm Component"
-+
-+.SH "NAME"
-+\fBSPANK\fR \- Slurm Plug\-in Architecture for Node and job (K)control
-+
-+.SH "DESCRIPTION"
-+This manual briefly describes the capabilities of the Slurm Plug\-in
-+architecture for Node and job Kontrol (\fBSPANK\fR) as well as the \fBSPANK\fR
-+configuration file: (By default: \fBplugstack.conf\fP.)
-+.LP
-+\fBSPANK\fR provides a very generic interface for stackable plug\-ins
-+which may be used to dynamically modify the job launch code in
-+Slurm. \fBSPANK\fR plugins may be built without access to Slurm source
-+code. They need only be compiled against Slurm's \fBspank.h\fR header file,
-+added to the \fBSPANK\fR config file \fBplugstack.conf\fR,
-+and they will be loaded at runtime during the next job launch. Thus,
-+the \fBSPANK\fR infrastructure provides administrators and other developers
-+a low cost, low effort ability to dynamically modify the runtime
-+behavior of Slurm job launch.
-+.LP
-+\fBNote\fR: \fBSPANK\fR plugins using the Slurm APIs need to be recompiled when
-+upgrading Slurm to a new major release.
-+.LP
-+
-+.SH "SPANK PLUGINS"
-+\fBSPANK\fR plugins are loaded in up to five separate contexts during a
-+\fBSlurm\fR job. Briefly, the five contexts are:
-+.TP 8
-+\fBlocal\fB
-+In \fBlocal\fR context, the plugin is loaded by \fBsrun\fR. (i.e. the "local"
-+part of a parallel job).
-+.TP
-+\fBremote\fR
-+In \fBremote\fR context, the plugin is loaded by \fBslurmstepd\fR. (i.e. the "remote"
-+part of a parallel job).
-+.TP
-+\fBallocator\fR
-+In \fBallocator\fR context, the plugin is loaded in one of the job allocation
-+utilities \fBsbatch\fR or \fBsalloc\fR.
-+.LP
-+.TP
-+\fBslurmd\fR In \fBslurmd\fR context, the plugin is loaded in the
-+\fBslurmd\fR daemon itself. \fBNote\fR: Plugins loaded in slurmd context
-+persist for the entire time slurmd is running, so if configuration is
-+changed or plugins are updated, slurmd must be restarted for the changes
-+to take effect.
-+.LP
-+.TP
-+\fBjob_script\fR
-+In the \fBjob_script\fR context, plugins are loaded in the context of the
-+job prolog or epilog. \fBNote\fR: Plugins are loaded in \fBjob_script\fR
-+context on each run on the job prolog or epilog, in a separate address
-+space from plugins in \fBslurmd\fR context. This means there is no
-+state shared between this context and other contexts, or even between
-+one call to \fBslurm_spank_job_prolog\fR or \fBslurm_spank_job_epilog\fR
-+and subsequent calls.
-+.LP
-+In local context, only the \fBinit\fR, \fBexit\fR, \fBinit_post_opt\fR, and
-+\fBlocal_user_init\fR functions are called. In allocator context, only the
-+\fBinit\fR, \fBexit\fR, and \fBinit_post_opt\fR functions are called.
-+Similarly, in slurmd context, only the \fBinit\fR and \fBslurmd_exit\fR
-+callbacks are active, and in the job_script context, only the \fBjob_prolog\fR
-+and \fBjob_epilog\fR callbacks are used.
-+Plugins may query the context in which they are running with the
-+\fBspank_context\fR and \fBspank_remote\fR functions defined in
-+\fB<slurm/spank.h>\fR.
-+.LP
-+\fBSPANK\fR plugins may be called from multiple points during the Slurm job
-+launch. A plugin may define the following functions:
-+.TP 2
-+\fBslurm_spank_init\fR
-+Called just after plugins are loaded. In remote context, this is just
-+after job step is initialized. This function is called before any plugin
-+option processing.
-+.TP
-+\fBslurm_spank_job_prolog\fR
-+Called at the same time as the job prolog. If this function returns a
-+negative value and the \fBSPANK\fR plugin that contains it is required in the
-+\fBplugstack.conf\fR, the node that this is run on will be drained.
-+
-+.TP
-+\fBslurm_spank_init_post_opt\fR
-+Called at the same point as \fBslurm_spank_init\fR, but after all
-+user options to the plugin have been processed. The reason that the
-+\fBinit\fR and \fBinit_post_opt\fR callbacks are separated is so that
-+plugins can process system-wide options specified in plugstack.conf in
-+the \fBinit\fR callback, then process user options, and finally take some
-+action in \fBslurm_spank_init_post_opt\fR if necessary.
-+In the case of a heterogeneous job, \fBslurm_spank_init\fR is invoked once
-+per job component.
-+.TP
-+\fBslurm_spank_local_user_init\fR
-+Called in local (\fBsrun\fR) context only after all
-+options have been processed.
-+This is called after the job ID and step IDs are available.
-+This happens in \fBsrun\fR after the allocation is made, but before
-+tasks are launched.
-+.TP
-+\fBslurm_spank_user_init\fR
-+Called after privileges are temporarily dropped. (remote context only)
-+.TP
-+\fBslurm_spank_task_init_privileged\fR
-+Called for each task just after fork, but before all elevated privileges
-+are dropped. (remote context only)
-+.TP
-+\fBslurm_spank_task_init\fR
-+Called for each task just before execve (2). If you are restricing memory
-+with cgroups, memory allocated here will be in the job's cgroup. (remote
-+context only)
-+.TP
-+\fBslurm_spank_task_post_fork\fR
-+Called for each task from parent process after fork (2) is complete.
-+Due to the fact that \fBslurmd\fR does not exec any tasks until all
-+tasks have completed fork (2), this call is guaranteed to run before
-+the user task is executed. (remote context only)
-+.TP
-+\fBslurm_spank_task_exit\fR
-+Called for each task as its exit status is collected by Slurm.
-+(remote context only)
-+.TP
-+\fBslurm_spank_exit\fR
-+Called once just before \fBslurmstepd\fR exits in remote context.
-+In local context, called before \fBsrun\fR exits.
-+.TP
-+\fBslurm_spank_job_epilog\fR
-+Called at the same time as the job epilog. If this function returns a
-+negative value and the \fBSPANK\fR plugin that contains it is required in the
-+\fBplugstack.conf\fR, the node that this is run on will be drained.
-+.TP
-+\fBslurm_spank_slurmd_exit\fR
-+Called in slurmd when the daemon is shut down.
-+.LP
-+All of these functions have the same prototype, for example:
-+.nf
-+
-+ int \fBslurm_spank_init\fR (spank_t spank, int ac, char *argv[])
-+
-+.fi
-+.LP
-+Where \fBspank\fR is the \fBSPANK\fR handle which must be passed back to
-+Slurm when the plugin calls functions like \fBspank_get_item\fR and
-+\fBspank_getenv\fR. Configured arguments (See \fBCONFIGURATION\fR
-+below) are passed in the argument vector \fBargv\fR with argument
-+count \fBac\fR.
-+.LP
-+\fBSPANK\fR plugins can query the current list of supported slurm_spank
-+symbols to determine if the current version supports a given plugin hook.
-+This may be useful because the list of plugin symbols may grow in the
-+future. The query is done using the \fBspank_symbol_supported\fR function,
-+which has the following prototype:
-+.nf
-+
-+ int \fBspank_symbol_supported\fR (const char *sym);
-+
-+.fi
-+.LP
-+The return value is 1 if the symbol is supported, 0 if not.
-+.LP
-+\fBSPANK\fR plugins do not have direct access to internally defined Slurm
-+data structures. Instead, information about the currently executing
-+job is obtained via the \fBspank_get_item\fR function call.
-+.nf
-+
-+ spank_err_t \fBspank_get_item\fR (spank_t spank, spank_item_t item, ...);
-+
-+.fi
-+The \fBspank_get_item\fR call must be passed the current \fBSPANK\fR
-+handle as well as the item requested, which is defined by the
-+passed \fBspank_item_t\fR. A variable number of pointer arguments are also
-+passed, depending on which item was requested by the plugin. A
-+list of the valid values for \fBitem\fR is kept in the \fBspank.h\fR header
-+file. Some examples are:
-+.TP 2
-+\fBS_JOB_UID\fR
-+User id for running job. (uid_t *) is third arg of \fBspank_get_item\fR
-+.TP
-+\fBS_JOB_STEPID\fR
-+Job step id for running job. (uint32_t *) is third arg of \fBspank_get_item\fR.
-+.TP
-+\fBS_TASK_EXIT_STATUS\fR
-+Exit status for exited task. Only valid from \fBslurm_spank_task_exit\fR.
-+(int *) is third arg of \fBspank_get_item\fR.
-+.TP
-+\fBS_JOB_ARGV\fR
-+Complete job command line. Third and fourth args to \fBspank_get_item\fR
-+are (int *, char ***).
-+.LP
-+See \fBspank.h\fR for more details, and \fBEXAMPLES\fR below for an example
-+of \fBspank_get_item\fR usage.
-+.LP
-+\fBSPANK\fR functions in the \fBlocal\fB and \fBallocator\fR environment should
-+use the \fBgetenv\fR, \fBsetenv\fR, and \fBunsetenv\fR functions to view and
-+modify the job's environment.
-+\fBSPANK\fR functions in the \fBremote\fR environment should use the
-+\fBspank_getenv\fR, \fBspank_setenv\fR, and \fBspank_unsetenv\fR functions to
-+view and modify the job's environment. \fBspank_getenv\fR
-+searches the job's environment for the environment variable
-+\fIvar\fR and copies the current value into a buffer \fIbuf\fR
-+of length \fIlen\fR. \fBspank_setenv\fR allows a \fBSPANK\fR
-+plugin to set or overwrite a variable in the job's environment,
-+and \fBspank_unsetenv\fR unsets an environment variable in
-+the job's environment. The prototypes are:
-+.nf
-+
-+ spank_err_t \fBspank_getenv\fR (spank_t spank, const char *var,
-+ char *buf, int len);
-+ spank_err_t \fBspank_setenv\fR (spank_t spank, const char *var,
-+ const char *val, int overwrite);
-+ spank_err_t \fBspank_unsetenv\fR (spank_t spank, const char *var);
-+.fi
-+.LP
-+These are only necessary in remote context since modifications of
-+the standard process environment using \fBsetenv\fR (3), \fBgetenv\fR (3),
-+and \fBunsetenv\fR (3) may be used in local context.
-+.LP
-+Functions are also available from within the \fBSPANK\fR plugins to
-+establish environment variables to be exported to the Slurm
-+\fBPrologSlurmctld\fR, \fBProlog\fR, \fBEpilog\fR and \fBEpilogSlurmctld\fR
-+programs (the so-called \fBjob control\fR environment).
-+The name of environment variables established by these calls will be prepended
-+with the string \fISPANK_\fR in order to avoid any security implications
-+of arbitrary environment variable control. (After all, the job control
-+scripts do run as root or the Slurm user.).
-+.LP
-+These functions are available from \fBlocal\fR context only.
-+.nf
-+
-+ spank_err_t \fBspank_job_control_getenv\fR(spank_t spank, const char *var,
-+ char *buf, int len);
-+ spank_err_t \fBspank_job_control_setenv\fR(spank_t spank, const char *var,
-+ const char *val, int overwrite);
-+ spank_err_t \fBspank_job_control_unsetenv\fR(spank_t spank, const char *var);
-+.fi
-+.LP
-+See \fBspank.h\fR for more information, and \fBEXAMPLES\fR below for an example
-+for \fBspank_getenv\fR usage.
-+.LP
-+Many of the described \fBSPANK\fR functions available to plugins return
-+errors via the \fBspank_err_t\fR error type. On success, the return value
-+will be set to \fBESPANK_SUCCESS\fR, while on failure, the return value
-+will be set to one of many error values defined in slurm/spank.h. The
-+\fBSPANK\fR interface provides a simple function
-+.nf
-+
-+ const char * \fBspank_strerror\fR(spank_err_t err);
-+
-+.fi
-+which may be used to translate a \fBspank_err_t\fR value into its
-+string representation.
-+
-+.LP
-+The \fBslurm_spank_log\fR function can be used to print messages back to the
-+user at an error level. This is to keep users from having to rely on the
-+\fBslurm_error\fR function, which can be confusing because it prepends
-+"\fBerror:\fR" to every message.
-+
-+.SH "SPANK OPTIONS"
-+.LP
-+SPANK plugins also have an interface through which they may define
-+and implement extra job options. These options are made available to
-+the user through Slurm commands such as \fBsrun\fR(1), \fBsalloc\fR(1),
-+and \fBsbatch\fR(1). If the option is specified by the user, its value is
-+forwarded and registered with the plugin in slurmd when the job is run.
-+In this way, \fBSPANK\fR plugins may dynamically provide new options and
-+functionality to Slurm.
-+.LP
-+Each option registered by a plugin to Slurm takes the form of
-+a \fBstruct spank_option\fR which is declared in \fB<slurm/spank.h>\fR as
-+.nf
-+
-+ struct spank_option {
-+ char * name;
-+ char * arginfo;
-+ char * usage;
-+ int has_arg;
-+ int val;
-+ spank_opt_cb_f cb;
-+ };
-+
-+.fi
-+
-+Where
-+.TP
-+.I name
-+is the name of the option. Its length is limited to \fBSPANK_OPTION_MAXLEN\fR
-+defined in \fB<slurm/spank.h>\fR.
-+.TP
-+.I arginfo
-+is a description of the argument to the option, if the option does take
-+an argument.
-+.TP
-+.I usage
-+is a short description of the option suitable for \-\-help output.
-+.TP
-+.I has_arg
-+0 if option takes no argument, 1 if option takes an argument, and
-+2 if the option takes an optional argument. (See \fBgetopt_long\fR (3)).
-+.TP
-+.I val
-+A plugin\-local value to return to the option callback function.
-+.TP
-+.I cb
-+A callback function that is invoked when the plugin option is
-+registered with Slurm. \fBspank_opt_cb_f\fR is typedef'd in
-+\fB<slurm/spank.h>\fR as
-+.nf
-+
-+ typedef int (*spank_opt_cb_f) (int val, const char *optarg,
-+ int remote);
-+
-+.fi
-+Where \fIval\fR is the value of the \fIval\fR field in the \fBspank_option\fR
-+struct, \fIoptarg\fR is the supplied argument if applicable, and \fIremote\fR
-+is 0 if the function is being called from the "local" host (e.g. host where
-+\fBsrun\fR or \fBsbatch/salloc\fR are invoked) or 1 from the "remote" host
-+(host where slurmd/slurmstepd run) but only executed by \fBslurmstepd\fR
-+(remote context) if the option was registered for such context.
-+.LP
-+Plugin options may be registered with Slurm using
-+the \fBspank_option_register\fR function. This function is only valid
-+when called from the plugin's \fBslurm_spank_init\fR handler, and
-+registers one option at a time. The prototype is
-+.nf
-+
-+ spank_err_t spank_option_register (spank_t sp,
-+ struct spank_option *opt);
-+
-+.fi
-+This function will return \fBESPANK_SUCCESS\fR on successful registration
-+of an option, or \fBESPANK_BAD_ARG\fR for errors including invalid spank_t
-+handle, or when the function is not called from the \fBslurm_spank_init\fR
-+function. All options need to be registered from all contexts in which
-+they will be used. For instance, if an option is only used in local (srun)
-+and remote (slurmd) contexts, then \fBspank_option_register\fR
-+should only be called from within those contexts. For example:
-+.nf
-+
-+ if (spank_context() != S_CTX_ALLOCATOR)
-+ spank_option_register (sp, opt);
-+
-+.fi
-+If, however, the option is used in all contexts, the \fBspank_option_register\fR
-+needs to be called everywhere.
-+.LP
-+In addition to \fBspank_option_register\fR, plugins may also export options
-+to Slurm by defining a table of \fBstruct spank_option\fR with the
-+symbol name \fBspank_options\fR. This method, however, is not supported
-+for use with \fBsbatch\fR and \fBsalloc\fR (allocator context), thus
-+the use of \fBspank_option_register\fR is preferred. When using the
-+\fBspank_options\fR table, the final element in the array must be
-+filled with zeros. A \fBSPANK_OPTIONS_TABLE_END\fR macro is provided
-+in \fB<slurm/spank.h>\fR for this purpose.
-+.LP
-+When an option is provided by the user on the local side, either by command line
-+options or by environment variables, \fBSlurm\fR will immediately invoke the
-+option's callback with \fIremote\fR=0. This is meant for the plugin to do local
-+sanity checking of the option before the value is sent to the remote side during
-+job launch. If the argument the user specified is invalid, the plugin should
-+issue an error and issue a non\-zero return code from the callback. The plugin
-+should be able to handle cases where the spank option is set multiple times
-+through environment variables and command line options. Environment variables
-+are processed before command line options.
-+.LP
-+On the remote side, options and their arguments are registered just
-+after \fBSPANK\fR plugins are loaded and before the \fBspank_init\fR
-+handler is called. This allows plugins to modify behavior of all plugin
-+functionality based on the value of user\-provided options.
-+(See EXAMPLES below for a plugin that registers an option with \fBSlurm\fR).
-+.LP
-+As an alternative to use of an option callback and global variable,
-+plugins can use the \fBspank_option_getopt\fR option to check for
-+supplied options after option processing. This function has the prototype:
-+.nf
-+
-+ spank_err_t spank_option_getopt(spank_t sp,
-+ struct spank_option *opt, char **optargp);
-+
-+.nf
-+This function returns \fBESPANK_SUCCESS\fR if the option defined in the
-+struct spank_option \fIopt\fR has been used by the user. If \fIoptargp\fR
-+is non-NULL then it is set to any option argument passed (if the option
-+takes an argument). The use of this method is \fIrequired\fR to process
-+options in \fBjob_script\fR context (\fBslurm_spank_job_prolog\fR and
-+\fBslurm_spank_job_epilog\fR). This function is valid in the following contexts:
-+slurm_spank_job_prolog, slurm_spank_local_user_init, slurm_spank_user_init,
-+slurm_spank_task_init_privileged, slurm_spank_task_init, slurm_spank_task_exit,
-+and slurm_spank_job_epilog.
-+
-+.SH "CONFIGURATION"
-+.LP
-+The default \fBSPANK\fR plug\-in stack configuration file is
-+\fBplugstack.conf\fR in the same directory as \fBslurm.conf\fR(5),
-+though this may be changed via the Slurm config parameter
-+\fIPlugStackConfig\fR. Normally the \fBplugstack.conf\fR file
-+should be identical on all nodes of the cluster.
-+The config file lists \fBSPANK\fR plugins,
-+one per line, along with whether the plugin is \fIrequired\fR or
-+\fIoptional\fR, and any global arguments that are to be passed to
-+the plugin for runtime configuration. Comments are preceded with '#'
-+and extend to the end of the line. If the configuration file
-+is missing or empty, it will simply be ignored.
-+.LP
-+The format of each non\-comment line in the configuration file is:
-+\fB
-+.nf
-+
-+ required/optional plugin arguments
-+
-+.fi
-+\fR For example:
-+.nf
-+
-+ optional /usr/lib/slurm/test.so
-+
-+.fi
-+Tells \fBslurmd\fR to load the plugin \fBtest.so\fR passing no arguments.
-+If a \fBSPANK\fR plugin is \fIrequired\fR, then failure of any of the
-+plugin's functions will cause \fBslurmd\fR to terminate the job, while
-+\fIoptional\fR plugins only cause a warning.
-+.LP
-+If a fully\-qualified path is not specified for a plugin, then the
-+currently configured \fIPluginDir\fR in \fBslurm.conf\fR(5) is searched.
-+.LP
-+\fBSPANK\fR plugins are stackable, meaning that more than one plugin may
-+be placed into the config file. The plugins will simply be called
-+in order, one after the other, and appropriate action taken on
-+failure given that state of the plugin's \fIoptional\fR flag.
-+.LP
-+Additional config files or directories of config files may be included
-+in \fBplugstack.conf\fR with the \fBinclude\fR keyword. The \fBinclude\fR
-+keyword must appear on its own line, and takes a glob as its parameter,
-+so multiple files may be included from one \fBinclude\fR line. For
-+example, the following syntax will load all config files in the
-+/etc/slurm/plugstack.conf.d directory, in local collation order:
-+.nf
-+
-+ include /etc/slurm/plugstack.conf.d/*
-+
-+.fi
-+which might be considered a more flexible method for building up
-+a spank plugin stack.
-+.LP
-+The \fBSPANK\fR config file is re\-read on each job launch, so editing
-+the config file will not affect running jobs. However care should
-+be taken so that a partially edited config file is not read by a
-+launching job.
-+
-+.SH "EXAMPLES"
-+.LP
-+Simple \fBSPANK\fR config file:
-+.nf
-+
-+#
-+# SPANK config file
-+#
-+# required? plugin args
-+#
-+optional renice.so min_prio=\-10
-+required /usr/lib/slurm/test.so
-+
-+.fi
-+.LP
-+The following is a simple \fBSPANK\fR plugin to modify the nice value
-+of job tasks. This plugin adds a \-\-renice=[prio] option to \fBsrun\fR
-+which users can use to set the priority of all remote tasks. Priority may
-+also be specified via a SLURM_RENICE environment variable. A minimum
-+priority may be established via a "min_prio" parameter in \fBplugstack.conf\fR
-+(See above for example).
-+.nf
-+
-+/*
-+ * To compile:
-+ * gcc \-shared \-o renice.so renice.c
-+ *
-+ */
-+#include <sys/types.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <unistd.h>
-+#include <string.h>
-+#include <sys/resource.h>
-+
-+#include <slurm/spank.h>
-+
-+/*
-+ * All spank plugins must define this macro for the
-+ * Slurm plugin loader.
-+ */
-+SPANK_PLUGIN(renice, 1);
-+
-+#define PRIO_ENV_VAR "SLURM_RENICE"
-+#define PRIO_NOT_SET 42
-+
-+/*
-+ * Minimum allowable value for priority. May be
-+ * set globally via plugin option min_prio=<prio>
-+ */
-+static int min_prio = \-20;
-+
-+static int prio = PRIO_NOT_SET;
-+
-+static int _renice_opt_process (int val,
-+ const char *optarg,
-+ int remote);
-+static int _str2prio (const char *str, int *p2int);
-+
-+/*
-+ * Provide a \-\-renice=[prio] option to srun:
-+ */
-+struct spank_option spank_options[] =
-+{
-+ { "renice", "[prio]",
-+ "Re\-nice job tasks to priority [prio].", 2, 0,
-+ (spank_opt_cb_f) _renice_opt_process
-+ },
-+ SPANK_OPTIONS_TABLE_END
-+};
-+
-+/*
-+ * Called from both srun and slurmd.
-+ */
-+int slurm_spank_init (spank_t sp, int ac, char **av)
-+{
-+ int i;
-+
-+ /* Don't do anything in sbatch/salloc */
-+ if (spank_context () == S_CTX_ALLOCATOR)
-+ return (0);
-+
-+ for (i = 0; i < ac; i++) {
-+ if (strncmp ("min_prio=", av[i], 9) == 0) {
-+ const char *optarg = av[i] + 9;
-+ if (_str2prio (optarg, &min_prio) < 0)
-+ slurm_error ("Ignoring invalid min_prio value: %s",
-+ av[i]);
-+ } else {
-+ slurm_error ("renice: Invalid option: %s", av[i]);
-+ }
-+ }
-+
-+ if (!spank_remote (sp))
-+ slurm_verbose ("renice: min_prio = %d", min_prio);
-+
-+ return (0);
-+}
-+
-+
-+int slurm_spank_task_post_fork (spank_t sp, int ac, char **av)
-+{
-+ pid_t pid;
-+ int taskid;
-+
-+ if (prio == PRIO_NOT_SET) {
-+ /* See if SLURM_RENICE env var is set by user */
-+ char val [1024];
-+
-+ if (spank_getenv (sp, PRIO_ENV_VAR, val, 1024)
-+ != ESPANK_SUCCESS)
-+ return (0);
-+
-+ if (_str2prio (val, &prio) < 0) {
-+ slurm_error ("Bad value for %s: %s",
-+ PRIO_ENV_VAR, optarg);
-+ return (\-1);
-+ }
-+
-+ if (prio < min_prio) {
-+ slurm_error ("%s=%d not allowed, using min=%d",
-+ PRIO_ENV_VAR, prio, min_prio);
-+ }
-+ }
-+
-+ if (prio < min_prio)
-+ prio = min_prio;
-+
-+ spank_get_item (sp, S_TASK_GLOBAL_ID, &taskid);
-+ spank_get_item (sp, S_TASK_PID, &pid);
-+
-+ slurm_info ("re\-nicing task%d pid %ld to %ld",
-+ taskid, pid, prio);
-+
-+ if (setpriority (PRIO_PROCESS, (int) pid,
-+ (int) prio) < 0) {
-+ slurm_error ("setpriority: %m");
-+ return (\-1);
-+ }
-+
-+ return (0);
-+}
-+
-+static int _str2prio (const char *str, int *p2int)
-+{
-+ long int l;
-+ char *p;
-+
-+ l = strtol (str, &p, 10);
-+ if ((*p != '\0') || (l < \-20) || (l > 20))
-+ return (\-1);
-+
-+ *p2int = (int) l;
-+
-+ return (0);
-+}
-+
-+static int _renice_opt_process (int val,
-+ const char *optarg,
-+ int remote)
-+{
-+ if (optarg == NULL) {
-+ slurm_error ("renice: invalid argument!");
-+ return (\-1);
-+ }
-+
-+ if (_str2prio (optarg, &prio) < 0) {
-+ slurm_error ("Bad value for \-\-renice: %s",
-+ optarg);
-+ return (\-1);
-+ }
-+
-+ if (prio < min_prio) {
-+ slurm_error ("\-\-renice=%d not allowed, will use min=%d",
-+ prio, min_prio);
-+ }
-+
-+ return (0);
-+}
-+
-+.fi
-+
-+.SH "COPYING"
-+Portions copyright (C) 2010-2018 SchedMD LLC.
-+Copyright (C) 2006 The Regents of the University of California.
-+Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
-+CODE\-OCEC\-09\-009. All rights reserved.
-+.LP
-+This file is part of Slurm, a resource management program.
-+For details, see <https://slurm.schedmd.com/>.
-+.LP
-+Slurm is free software; you can redistribute it and/or modify it under
-+the terms of the GNU General Public License as published by the Free
-+Software Foundation; either version 2 of the License, or (at your option)
-+any later version.
-+.LP
-+Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
-+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-+details.
-+.SH "FILES"
-+\fB/etc/slurm/slurm.conf\fR \- Slurm configuration file.
-+.br
-+\fB/etc/slurm/plugstack.conf\fR \- SPANK configuration file.
-+.br
-+\fB/usr/include/slurm/spank.h\fR \- SPANK header file.
-+.SH "SEE ALSO"
-+.LP
-+\fBsrun\fR(1), \fBslurm.conf\fR(5)
+@@ -1,4 +1,4 @@
+-.TH SPANK "8" "Slurm Component" "April 2021" "Slurm Component"
++.TH SPANK "7" "Slurm Component" "April 2021" "Slurm Component"
+
+ .SH "NAME"
+ \fBSPANK\fR \- Slurm Plug\-in Architecture for Node and job (K)control
+diff --git a/doc/man/man8/Makefile.am b/doc/man/man8/Makefile.am
+index 8527b1c0..27b5777e 100644
--- a/doc/man/man8/Makefile.am
+++ b/doc/man/man8/Makefile.am
-@@ -4,8 +4,7 @@
+@@ -4,8 +4,7 @@ man8_MANS = slurmctld.8 \
slurmd.8 \
slurmdbd.8 \
slurmrestd.8 \
@@ -1554,7 +133,7 @@ Last-Update: 2020-11-05
if HAVE_MAN2HTML
-@@ -14,8 +13,7 @@
+@@ -14,8 +13,7 @@ html_DATA = \
slurmd.html \
slurmdbd.html \
slurmrestd.html \
diff --git a/debian/patches/series b/debian/patches/series
index 7f49a533..5d488d95 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -6,3 +6,4 @@ rpath
miscellanea-manpages
fix-typos
pmixv4
+fix-typos-acct_gather.conf.5
Reply to: