Commit e6a0d53f authored by Bram Daams's avatar Bram Daams
Browse files

JOB_GRACE env var in cron job to set grace time for the check

parent 22444131
Pipeline #37145 failed with stage
in 1 minute and 17 seconds
......@@ -47,7 +47,8 @@ The following data is used to configure a corresponding Healthchecks check:
- `JOB_TAGS`: use this environment variable in a job to specify tag names separated by a comma to specify additional tags
- `$USER`: the current user running the cron command is used to create a tag named `user=$USER`
- the jobs schedule and the hosts timezone is used to set the checks schedule
- when registering a new check, the execution time of the command is used to set an initial grace time. The grace time will be set to 1.2 times the execution time + 30 seconds. As per the Healthchecks API, the minimal grace time is 1 minute and the maximum grace time is 30 days.
- `JOB_GRACE`: the value of this environment variable is used to set the grace time in seconds for the check. See JOB_GRACE for valid interval formats.
- when registering a new check and JOB_GRACE is not set, the execution time of the command is used to set an initial grace time. The grace time will be set to 1.2 times the execution time + 30 seconds. As per the Healthchecks API, the minimal grace time is 1 minute and the maximum grace time is 30 days.
An example of a cron file that touches most of the functionality would look like:
```
......@@ -62,6 +63,23 @@ SHELL=/usr/loca/bin/sch
10 8-20/2 * mon-fri backup JOB_ID=db-backups JOB_TAGS=db,backup,my_project /usr/local/bin/run-db-backups
```
#### JOB_GRACE interval format
If no suffixes are used, seconds are assumed.
You can make use of the following suffixes to specify an interval:
| Suffix | Interval |
|--------|----------|
| s | seconds |
| m | minutes |
| h | hours |
| D | days |
| W | weeks |
| M | months |
| Y | years |
Although days and weeks are accepted, you might want to limit the interval to several hours ;-)
### Job execution
`sch` takes over the role of the shell. Jobs not containing the `JOB_ID` environment variable are directly executed with `os.system`.
For `sch` managed jobs:
......
......@@ -18,6 +18,15 @@ HealthcheckCredentials = collections.namedtuple(
'api_url api_key'
)
INTERVAL_DICT = collections.OrderedDict([
("Y", 365*86400), # 1 year
("M", 30*86400), # 1 month
("W", 7*86400), # 1 week
("D", 86400), # 1 day
("h", 3600), # 1 hour
("m", 60), # 1 minute
("s", 1)]) # 1 second
class Healthchecks:
"""
......@@ -83,16 +92,52 @@ class Healthchecks:
except requests.exceptions.HTTPError as err:
print(err)
@staticmethod
def human_to_seconds(string):
"""Convert internal string like 1M, 1Y3M, 3W to seconds.
:type string: str
:param string: Interval string like 1M, 1W, 1M3W4h2s...
(s => seconds, m => minutes, h => hours, D => days,
W => weeks, M => months, Y => Years).
:rtype: int
:return: The conversion in seconds of string.
"""
interval_exc = "Bad interval format for {0}".format(string)
interval_regex = re.compile(
"^(?P<value>[0-9]+)(?P<unit>[{0}])".format(
"".join(INTERVAL_DICT.keys())))
if string.isdigit():
seconds = int(string)
return seconds
seconds = 0
while string:
match = interval_regex.match(string)
if match:
value, unit = int(match.group("value")), match.group("unit")
if int(value) and unit in INTERVAL_DICT:
seconds += value * INTERVAL_DICT[unit]
string = string[match.end():]
else:
raise Exception(interval_exc)
else:
raise Exception(interval_exc)
return seconds
@staticmethod
def get_job_tags(job):
"""
Returns the tags specified in the environment variable
JOB_TAGS in the cron job
"""
regex = r'.*JOB_TAGS=([\w,]*)'
match = re.match(regex, job.command)
if match:
return match.group(1).replace(',', ' ')
tags = Healthchecks.extract_env_var(job.command, 'JOB_TAGS')
if tags:
return tags.replace(',', ' ')
return ""
@staticmethod
......@@ -101,15 +146,29 @@ class Healthchecks:
Returns the value of environment variable JOB_ID if specified
in the cron job
"""
return Healthchecks.extract_job_id(job.command)
return Healthchecks.extract_env_var(job.command, 'JOB_ID')
@staticmethod
def get_job_grace(job):
"""
Returns the value of environment variable JOB_ID if specified
in the cron job
"""
grace_time = Healthchecks.extract_env_var(job.command, 'JOB_GRACE')
if grace_time:
grace_time = Healthchecks.human_to_seconds(grace_time)
grace_time = Healthchecks.coerce_grace_time(grace_time)
return grace_time
return None
@staticmethod
def extract_job_id(command):
def extract_env_var(command, env_var):
"""
Returns the value of environment variable JOB_ID if specified
in the command
"""
regex = r".*JOB_ID=(\w*)"
regex = r".*{env_var}=([\w,]*)".format(env_var=env_var)
match = re.match(regex, command)
if match:
return match.group(1)
......@@ -167,21 +226,26 @@ class Healthchecks:
print("updating check")
# gather all the jobs' metadata
data = {
'schedule': job.slices.render(),
'desc': job.comment,
'grace': 3600,
'tz': tzlocal.get_localzone().zone,
'tags': 'sch host={host} job_id={job_id} user={user} '
'hash={hash} {tags}'.format(
'hash={hash} {job_tags}'.format(
host=socket.getfqdn(),
job_id=self.get_job_id(job),
user=os.environ['LOGNAME'],
hash=job_hash,
tags=self.get_job_tags(job)
job_tags=self.get_job_tags(job)
)
}
# grace time
grace = Healthchecks.get_job_grace(job)
if grace:
data['grace'] = grace
# post the data
try:
response = requests.post(
......@@ -208,20 +272,25 @@ class Healthchecks:
data = {
'name': self.get_job_id(job),
'schedule': job.slices.render(),
'desc': job.comment,
'grace': 3600,
'desc': job.comment,
'channels': '*', # all available notification channels
'tz': tzlocal.get_localzone().zone,
'tags': 'sch host={host} job_id={job_id} user={user} '
'hash={hash} {tags}'.format(
'hash={hash} {job_tags}'.format(
host=socket.getfqdn(),
job_id=self.get_job_id(job),
user=os.environ['LOGNAME'],
hash=job_hash,
tags=self.get_job_tags(job)
job_tags=self.get_job_tags(job)
)
}
# grace time
grace = Healthchecks.get_job_grace(job)
if grace:
data['grace'] = grace
# post the data
try:
response = requests.post(
......@@ -239,15 +308,23 @@ class Healthchecks:
# return check
return response.json()
def set_grace_time(self, check, grace_time):
@staticmethod
def coerce_grace_time(grace_time):
"""
set the grace time for a check
returns the adjusted grace_time so it is in spec with the grace time
expected by the Healthchecks API
"""
# make sure the grace time respects the hc api
grace_time = max(60, grace_time)
grace_time = min(grace_time, 2592000)
data = {'grace': grace_time}
return grace_time
def set_grace_time(self, check, grace_time):
"""
set the grace time for a check
"""
data = {'grace': Healthchecks.coerce_grace_time(grace_time)}
# post the data
try:
......
......@@ -72,7 +72,7 @@ def run():
command = sys.argv[2]
# only handle the command when JOB_ID is in there
if not Healthchecks.extract_job_id(command):
if not Healthchecks.extract_env_var(command, 'JOB_ID'):
execute_shell_command(command)
sys.exit()
......@@ -86,6 +86,7 @@ def run():
# look for the escaped version of the command
escaped_command = command.replace('%', r'\%')
job = None
jobs = CronTabs().all.find_command(escaped_command)
for job in jobs:
check = health_checks.find_check(job)
......@@ -99,14 +100,16 @@ def run():
if not check:
sys.exit("Error: could not find or register check for given command")
# execute command
# ping start
health_checks.ping(check, '/start')
timer = TicToc()
timer.tic()
# execute command
exit_code = execute_shell_command(command)
timer.toc()
# ping end
......@@ -114,9 +117,14 @@ def run():
# ping success
health_checks.ping(check)
if is_new_check:
grace_time = round(1.2 * timer.elapsed + 30)
health_checks.set_grace_time(check, grace_time)
# set grace time from measurement if the check is
# - new
# - there's no JOB_GRACE set in the job command
if is_new_check and not health_checks.get_job_grace(job):
health_checks.set_grace_time(
check,
round(1.2 * timer.elapsed + 30)
)
else:
# ping failure
health_checks.ping(check, '/fail')
......
......@@ -10,7 +10,7 @@ box() {
}
# crude way to list cron commands in /etc/cron.d/test
grep -v ^# /etc/cron.d/test | grep . | sed 's/^.* root //' | sed 's/\ \#.*//' |
grep -v ^# /etc/cron.d/test | grep -v SHELL | grep . | sed 's/^.* root //' | sed 's/\ \#.*//' |
while read line
do
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment