aboutsummaryrefslogtreecommitdiffstats
path: root/sysutils/slurm-wlm/files/slurmctld.in
blob: 94f5429687d4a72384db4f29f5abd048a5c0aaa8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/bin/sh

# PROVIDE: slurmctld
# REQUIRE: DAEMON munge
# BEFORE: LOGIN
# KEYWORD: shutdown
#
# Add the following lines to /etc/rc.conf.local or /etc/rc.conf
# to enable this service:
#
# slurmctld_enable (bool):     Set to NO by default.
#                              Set it to YES to enable slurmctld.
#
# Common knobs (honoured by slurmctld and slurmd):
#   slurm_user (str):          User to run Slurm daemons as (default: slurm)
#   slurm_group (str):         Group to run Slurm daemons as (default: slurm)
#   slurm_conf (str):          Path to slurm.conf, exported as SLURM_CONF
#                              (default: %%ETCDIR%%/slurm.conf)
#   slurm_logdir (str):        Log directory (default: /var/log/slurm)
#   slurm_rundir (str):        Runtime directory (default: /var/run/slurm)
#
# Service-specific knobs:
#   slurmctld_flags (str):     Extra arguments passed to slurmctld.
#   slurmctld_pidfile (str):   PID file path
#                              (default: ${slurm_rundir}/slurmctld.pid)
#   slurmctld_logfile (str):   Log file path
#                              (default: ${slurm_logdir}/slurmctld.log)
#

. /etc/rc.subr

name="slurmctld"
rcvar="slurmctld_enable"

load_rc_config $name

# Common defaults (shared conceptual contract with slurmd)
: ${slurm_user:="slurm"}
: ${slurm_group:="slurm"}
: ${slurm_conf:="%%ETCDIR%%/slurm.conf"}
: ${slurm_logdir:="/var/log/slurm"}
: ${slurm_rundir:="/var/run/slurm"}

# Service defaults
: ${slurmctld_enable:="NO"}
: ${slurmctld_flags:=""}
: ${slurmctld_pidfile:="${slurm_rundir}/slurmctld.pid"}
: ${slurmctld_logfile:="${slurm_logdir}/slurmctld.log"}

pidfile="${slurmctld_pidfile}"

# We run slurmctld under daemon(8) for pidfile/log management.
command="/usr/sbin/daemon"
procname="%%PREFIX%%/sbin/${name}"
command_args="-P ${pidfile} -o ${slurmctld_logfile} ${procname} -D ${slurmctld_flags}"

extra_commands="reload"
start_precmd="${name}_prestart"
reload_cmd="${name}_reload"
status_cmd="${name}_status"
stop_cmd="${name}_stop"

slurmctld_prestart()
{
	# Ensure log and run directories exist with correct ownership/modes.
	/usr/bin/install -d -o "${slurm_user}" -g "${slurm_group}" -m 0750 "${slurm_logdir}" || return 1
	/usr/bin/install -d -o "root" -g "wheel" -m 0755 "${slurm_rundir}" || return 1

	# Export SLURM_CONF if not already provided in the environment.
	if [ -z "${SLURM_CONF}" ]; then
		export SLURM_CONF="${slurm_conf}"
	fi
}

slurmctld_reload()
{
	if [ ! -r "${pidfile}" ]; then
		echo "${name} not running? (pidfile not found)"
		return 1
	fi
	echo "Reloading ${name} configuration."
	kill -HUP "$(cat "${pidfile}")"
}

slurmctld_status()
{
	if [ ! -r "${pidfile}" ]; then
		echo "${name} is not running (no pidfile)."
		return 1
	fi
	if ! check_pidfile "${pidfile}" "${procname}"; then
		echo "${name} is not running (stale pidfile)."
		return 1
	fi
	echo "${name} is running as pid $(cat "${pidfile}")."
}

slurmctld_stop()
{
	if [ ! -r "${pidfile}" ]; then
		echo "${name} not running? (no pidfile)."
		return 1
	fi

	if ! check_pidfile "${pidfile}" "${procname}"; then
		echo "${name} not running? (stale pidfile: ${pidfile})."
		rm -f "${pidfile}"
		return 1
	fi

	pid="$(cat "${pidfile}" 2>/dev/null || true)"
	if [ -z "${pid}" ]; then
		echo "${name} not running? (empty pidfile: ${pidfile})."
		rm -f "${pidfile}"
		return 1
	fi

	echo "Stopping ${name} (pid ${pid})."
	kill -TERM "${pid}" 2>/dev/null || true

	# Allow a short grace period for clean shutdown and helper cleanup.
	i=0
	while [ $i -lt 5 ]; do
		if ! check_pidfile "${pidfile}" "${procname}"; then
			rm -f "${pidfile}"
			return 0
		fi
		i=$((i + 1))
		sleep 1
	done

	echo "${name} did not exit on SIGTERM; sending SIGKILL to controller and its helpers."
	# Kill children (e.g. slurmscriptd) first, then the controller itself.
	pkill -KILL -P "${pid}" 2>/dev/null || true
	kill -KILL "${pid}" 2>/dev/null || true

	rm -f "${pidfile}"
}

run_rc_command "$1"