diff --git a/README.md b/README.md index b85195d896e64c4636f41a70b2ea16046eeee698..bdbaba27d50313a80463c23b95e51b22af06beac 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +[PACKAGE]: ../../raw/master/mkp/check_ntp-0.0.3-20230607.mkp "check_ntp-0.0.3-20230607.mkp" # Active Check NTP This plugin is an active check to monitor NTP servers. diff --git a/mkp/check_ntp-0.0.3-20230607.mkp b/mkp/check_ntp-0.0.3-20230607.mkp new file mode 100644 index 0000000000000000000000000000000000000000..bc502ffec70c145a38158d4507026770eb3d1fd1 Binary files /dev/null and b/mkp/check_ntp-0.0.3-20230607.mkp differ diff --git a/source/checks/check_ntp b/source/checks/check_ntp new file mode 100644 index 0000000000000000000000000000000000000000..620b90235d3920023148c377076cb6157edd7936 --- /dev/null +++ b/source/checks/check_ntp @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# License: GNU General Public License v2 +# +# +# Author: thl-cmk[at]outlook[dot]com +# URL : https://thl-cmk.hopto.org +# Date : 2022-10-04 +# File : checks/active_checks_ntp +# +# 2022-11-15: changed to use short options, doesn't work with long options + +def check_ntp_arguments(params): + args = [] + + if 'port' in params: + args.append(f'-p {params["port"]}') + + if 'timeout' in params: + args.append(f'-t {params["timeout"]}') + + if 'server' in params: + args.append(f'-H {params["server"]}') + else: + args.append('-H $HOSTADDRESS$') + + if 'version' in params: + args.append(f'-V {params["version"]}') + + if 'offset_levels' in params: + args.append(f'-o {params["offset_levels"][0]},{params["offset_levels"][1]}') + + if 'stratum_levels' in params: + args.append(f'-s {params["stratum_levels"][0]},{params["stratum_levels"][1]}') + + if 'dispersion_levels' in params: + args.append(f'-D {params["dispersion_levels"][0]},{params["dispersion_levels"][1]}') + + if 'delay_levels' in params: + args.append(f'-d {params["dispersion_levels"][0]},{params["dispersion_levels"][1]}') + + if 'state_not_synchronized' in params: + args.append(f'-n {params["state_not_synchronized"]}') + + if 'state_no_response' in params: + args.append(f'-r {params["state_no_response"]}') + + return args + + +def _check_description(params): + if 'description' in params: + return f'NTP server {params["description"]}' + + return 'NTP server' + + +active_check_info['ntp'] = { + 'command_line': 'check_ntp $ARG1$', + 'argument_function': check_ntp_arguments, + 'service_description': _check_description, + 'has_perfdata': True, +} diff --git a/source/gui/metrics/check_ntp.py b/source/gui/metrics/check_ntp.py new file mode 100644 index 0000000000000000000000000000000000000000..3456514550f450e253cd2a3f848f30dfa9a3d8ea --- /dev/null +++ b/source/gui/metrics/check_ntp.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# License: GNU General Public License v2 +# +# Author: thl-cmk[at]outlook[dot]com +# URL : https://thl-cmk.hopto.org +# Date : 2022-10-06 +# File : metrics/check_ntp.py +# +# + +from cmk.gui.i18n import _ + +from cmk.gui.plugins.metrics.utils import ( + metric_info, + graph_info, + perfometer_info +) + +metric_info['ntp_offset'] = { + 'title': _('Offset'), + 'unit': 's', + 'color': '#9a52bf', +} + + +metric_info['ntp_delay'] = { + 'title': _('Delay'), + 'help': _(''), + 'unit': 's', + 'color': '26/a', +} + +metric_info['ntp_root_dispersion'] = { + 'title': _('Root dispersion'), + 'help': _(''), + 'unit': 's', + 'color': '32/a', +} + + +graph_info['check_ntp_offset'] = { + 'title': _('Time offset'), + 'metrics': [ + ('ntp_offset', 'area'), + ], + 'scalars': [ + ('ntp_offset:crit', _('Upper critical level')), + ('ntp_offset:warn', _('Upper warning level')), + ('0,ntp_offset:warn,-', _('Lower warning level')), + ('0,ntp_offset:crit,-', _('Lower critical level')), + ], + 'range': ('0,ntp_offset:crit,-', 'ntp_offset:crit'), +} + +graph_info['check_ntp_delay'] = { + 'title': _('Delay'), + 'metrics': [ + ('ntp_delay', 'area'), + ], + 'scalars': [ + ('ntp_delay:crit', _('Critical')), + ('ntp_delay:warn', _('Warning')), + ], + 'range': (0, 'ntp_delay:max'), +} + + +graph_info['check_ntp_dispersion'] = { + 'title': _('Root dispersion'), + 'metrics': [ + ('ntp_root_dispersion', 'area'), + ], + 'scalars': [ + ('ntp_root_dispersion:crit', _('Critical')), + ('ntp_root_dispersion:warn', _('Warning')), + ], + 'range': (0, 'ntp_root_dispersion:max'), +} + + +perfometer_info.append({ + 'type': 'logarithmic', + 'metric': 'ntp_offset', + 'half_value': 1.0, + 'exponent': 10.0, +}) diff --git a/source/gui/wato/check_ntp.py b/source/gui/wato/check_ntp.py new file mode 100644 index 0000000000000000000000000000000000000000..451a293db59736f8aaf9a78e683fa849c02f13da --- /dev/null +++ b/source/gui/wato/check_ntp.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# License: GNU General Public License v2 +# +# +# Author: thl-cmk[at]outlook[dot]com +# URL : https://thl-cmk.hopto.org +# Date : 2022-10-04 +# File : wato/active_checks_ntp.py +# + +from cmk.gui.i18n import _ +from cmk.gui.valuespec import ( + Dictionary, + Tuple, + Transform, + Integer, + TextAscii, + MonitoringState, +) + +from cmk.gui.plugins.wato.utils import ( + rulespec_registry, + HostRulespec, +) + +from cmk.gui.plugins.wato.active_checks import ( + RulespecGroupActiveChecks +) + + +def _valuespec_active_checks_ntp(): + return Transform( + Dictionary( + title=_('Check NTP service'), + help=_(''), + elements=[ + ('description', + TextAscii( + title=_('Service description'), + help=_( + 'Must be unique for every host. The service description starts always with \"NTP server\".'), + size=50, + )), + ('server', + TextAscii( + title=_('Server IP-address or name'), + help=_( + 'Hostname or IP-address to monitor. Default is the host name/IP-Address of the monitored host.' + ), + size=50, + )), + ('port', + Integer( + title=_('NTP port'), + help=_('UDP Port to use. Default is 123.'), + # size=5, + default_value=123, + minvalue=1, + maxvalue=65535, + )), + ('version', + Integer( + title=_('NTP version'), + help=_('NTP version for the request. Default is version 4.'), + # size=1, + default_value=4, + minvalue=1, + maxvalue=4, + )), + ('timeout', + Integer( + title=_('Request timeout'), + help=_('Timeoute for the request in seconds. Min: 1s, Max: 20, Default is 2 seconds.'), + # size=3, + default_value=2, + minvalue=1, + maxvalue=20, + )), + ('state_not_synchronized', + MonitoringState( + title=_('Monitoring state if server is not synchronized'), + help=_('Monitoring state if server is not synchronized. Default is warning.'), + default_value=2, + )), + ('state_no_response', + MonitoringState( + default_value=2, + title=_('Monitoring state if server doesn\'t respond (timeout)'), + help=_('Monitoring state if the server doesn\'t respond. Default is "CRIT"') + )), + ('stratum_levels', + Tuple( + title=_('max. stratum'), + elements=[ + Integer( + title=_('Warning at'), + default_value=10, + maxvalue=255, + minvalue=1, + help=_( + 'The stratum (\'distance\' to the reference clock) at which the check gets warning.'), + ), + Integer( + title=_('Critical at'), + default_value=15, + maxvalue=18, + help=_( + 'The stratum (\'distance\' to the reference clock) at which the check gets critical.'), + ) + ], + )), + ('offset_levels', + Tuple( + title=_('max. offset in ms'), + help=_('Mean offset in the times reported between this local host and the remote peer or server.' + 'Note: This levels will also be used as lower levels.'), + elements=[ + Integer( + title=_('Warning at'), + unit='ms', + default_value=200, + help=_('The offset in ms at which a warning state is triggered. Default is 200ms'), + ), + Integer( + title=_('Critical at'), + unit='ms', + default_value=500, + help=_('The offset in ms at which a critical state is triggered. Default is 500ms'), + ) + ], + )), + ('delay_levels', + Tuple( + title=_('max. delay in ms'), + help=_('Upper levels for delay in milly seconds.'), + elements=[ + Integer( + title=_('Warning at'), + unit='ms', + default_value=200, + help=_('The delay in ms at which a warning state is triggered. Default is 200ms'), + ), + Integer( + title=_('Critical at'), + unit='ms', + default_value=500, + help=_('The delay in s at which a critical state is triggered. Default is 500ms'), + ) + ], + )), + ('dispersion_levels', + Tuple( + title=_('max. root dispersion in s'), + help=_('Upper levels for (root) dispersion in seconds.'), + elements=[ + Integer( + title=_('Warning at'), + unit='s', + default_value=3, + help=_('The dispersion in s at which a warning state is triggered. Default is 3s'), + ), + Integer( + title=_('Critical at'), + unit='s', + default_value=5, + help=_('The dispersion in s at which a critical state is triggered. Default is 5s'), + ) + ], + )), + ], + ), + ) + + +rulespec_registry.register( + HostRulespec( + group=RulespecGroupActiveChecks, + match_type='all', + name='active_checks:ntp', + valuespec=_valuespec_active_checks_ntp, + ) +) diff --git a/source/lib/nagios/plugins/check_ntp b/source/lib/nagios/plugins/check_ntp new file mode 100755 index 0000000000000000000000000000000000000000..f270473b5fb7873bb996ddd51aab0aa69cbde28d --- /dev/null +++ b/source/lib/nagios/plugins/check_ntp @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# License: GNU General Public License v2 +# +# +# Author: thl-cmk[at]outlook[dot]com +# URL : https://thl-cmk.hopto.org +# Date : 2022-10-04 +# File : active_checks_ntp.py +# +# Active check to monitor NTP servers. +# +# 2022-10-13: added exception handling for ntp request +# 2022-11-14: made state on no response configurable +# 2022-11-15: added short options +# 2023-06-07: moved gui files to ~/local/lib/chek_mk/gui/plugins/... + +from typing import Optional, Sequence, Tuple +from ipaddress import IPv4Address +import sys +import argparse +import socket +from time import ctime + +import ntplib + +no_ntplib = False +try: + from ntplib import NTPClient, NTPStats +except ModuleNotFoundError: + no_ntplib = True + +_ntp_leap = { + 0: 'no warning', + 1: 'last minute of the day has 61 seconds', + 2: 'last minute of the day has 59 seconds', + 3: 'unknown(clock unsynchronized)', +} + +_ntp_mode = { + 0: 'reserved', + 1: 'symmetric active', + 2: 'symmetric passive', + 3: 'client', + 4: 'server', + 5: 'broadcast', + 6: 'NTP control message', + 7: 'reserved for private use', +} + +_ntp_ref_id = { + # from RFC5905 + 'GOES': 'Geosynchronous Orbit Environment Satellite', + 'GPS': 'Global Position System', + 'GAL': 'Galileo Positioning System', + 'PPS': 'Generic pulse - per - second', + 'IRIG': 'Inter - Range Instrumentation Group', + 'WWVB': 'LF Radio WWVB Ft.Collins, CO 60 kHz', + 'DCF': 'LF Radio DCF77 Mainflingen, DE 77.5 kHz', + 'HBG': 'LF Radio HBG Prangins, HB 75 kHz', + 'MSF': 'LF Radio MSF Anthorn, UK 60 kHz', + 'JJY': 'LF Radio JJY Fukushima, JP 40 kHz, Saga, JP 60 kHz', + 'LORC': 'MF Radio LORAN C station, 100 kHz', + 'TDF': 'MF Radio Allouis, FR 162 kHz', + 'CHU': 'HF Radio CHU Ottawa, Ontario', + 'WWV': 'HF Radio WWV Ft.Collins, CO', + 'WWVH': 'HF Radio WWVH Kauai, HI', + 'NIST': 'NIST telephone modem', + 'ACTS': 'NIST telephone modem', + 'USNO': 'USNO telephone modem', + 'PTB': 'European telephone modem', + # from meienberg + # 'PPS': '“Pulse Per Second†from a time standard', + # 'IRIG': 'Inter-Range Instrumentation Group time code', + # 'ACTS': 'American NIST time standard telephone modem', + # 'NIST': 'American NIST time standard telephone modem', + # 'PTB': 'German PTB time standard telephone modem', + # 'USNO': 'American USNO time standard telephone modem', + # 'CHU': 'CHU (HF, Ottawa, ON, Canada) time standard radio receiver', + 'DCFa': 'DCF77 (LF, Mainflingen, Germany) time standard radio receiver', + # 'HBG': 'HBG (LF Prangins, Switzerland) time standard radio receiver', + # 'JJY': 'JJY (LF Fukushima, Japan) time standard radio receiver', + # 'LORC': 'LORAN-C station (MF) time standard radio receiver. Note, no longer operational (superseded by eLORAN)', + # 'MSF': 'MSF (LF, Anthorn, Great Britain) time standard radio receiver', + # 'TDF': 'TDF (MF, Allouis, France) time standard radio receiver', + # 'WWV': 'WWV (HF, Ft. Collins, CO, America) time standard radio receiver', + # 'WWVB': 'WWVB (LF, Ft. Collins, CO, America) time standard radio receiver', + # 'WWVH': 'WWVH (HF, Kauai, HI, America) time standard radio receiver', + # 'GOES': 'American Geosynchronous Orbit Environment Satellite', + # 'GPS': 'American GPS', + # 'GAL': 'Galileo European GNSS', + 'ACST': 'manycast server', + 'AUTO': 'Autokey sequence error', + 'BCST': 'broadcast server', + 'MCST': 'multicast server', +} + +_ntp_refids_bad = { + 'AUTH': 'authentication error', + 'AUTO': 'Autokey sequence error', + 'CRYPT': 'Autokey protocol error', + 'DENY': 'Access denied by server', + 'INIT': 'Association initialized', + 'RATE': 'Polling rate exceeded', + 'LOCL': 'This local host (a place marker at the lowest stratum included in case ' + 'there are no remote peers or servers available)', + 'STEP': 'Step time change, the offset is less than the panic threshold (1000ms) ' + 'but greater than the step threshold (125ms).', + 'TIME': 'Association timeout', + 'XFAC': 'Association changed (IP address changed or lost)', +} + + +def _ntp_decode_ref_id(stratum: int, ref_id: int): + if 1 < stratum < 16: + return IPv4Address(ref_id) + + elif stratum in [0, 1]: + _byte4 = ref_id % 256 + _byte3 = (ref_id // 256) % 256 + _byte2 = (ref_id // 256 // 256) % 256 + _byte1 = (ref_id // 256 // 256 // 256) + + ref_id = '' + for _byte in [_byte1, _byte2, _byte3, _byte4]: + if _byte > 31: + ref_id += chr(_byte) + + return ref_id + + +def parse_arguments(argv: Sequence[str]) -> argparse.Namespace: + + def _warn_crit(arg: str) -> Optional[Tuple[int, int]]: + arg = arg.strip('(').strip(')').split(',') + warn, crit = arg + try: + arg = (int(warn), int(crit)) + except ValueError as e: + raise argparse.ArgumentTypeError(e) + + return arg + + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + epilog='Add WARN,CRIT levels separated by comma without brackets, like this: "--offset 200,500".' + 'To use this check plugin you need to install the python "ntplib" in your CMK python environment.' + ) + parser.add_argument( + '-H', '--host', required=True, + help='Host to query (required)') + parser.add_argument( + '-p', '--port', type=int, default=123, + help='UDP port to use.') + parser.add_argument( + '-t', '--timeout', type=int, default=2, + help='Request timeout in seconds.') + parser.add_argument( + '-V', '--version', type=int, default=4, choices=[1, 2, 3, 4], + help='NTP version to use.') + parser.add_argument( + '-n', '--state_not_synchronized', type=int, default=2, choices=[0, 1, 2, 3], + help='Monitoring state if not synchronized.') + parser.add_argument( + '-r', '--state_no_response', type=int, default=2, choices=[0, 1, 2, 3], + help='Monitoring state if response (timeout) received.') + parser.add_argument( + '-s', '--stratum', type=_warn_crit, default=(10, 15), + help='WARN,CRIT levels for stratum. Use values > 16 to disable.') + parser.add_argument( + '-o', '--offset', type=_warn_crit, default=(200, 500), + help='WARN,CRIT levels for offset in milliseconds.') + parser.add_argument( + '-d', '--delay', type=_warn_crit, default=(200, 500), + help='WARN,CRIT levels for delay in milliseconds.') + parser.add_argument( + '-D', '--dispersion', type=_warn_crit, default=(200, 500), + help='WARN,CRIT levels for dispersion in seconds.') + + args = parser.parse_args(argv) + args.host = args.host.strip(' ') + return args + + +def get_ntp_time(server: str, port: int, timeout: int, version: int, state_no_response: int): # -> Optional[NTPStats] + # NTPStats is not available if ntplib is not installed + c = NTPClient() + try: + response = c.request( + host=server, + port=port, + timeout=timeout, + version=version + ) + except (ntplib.NTPException, socket.gaierror) as e: + sys.stdout.write(f'{e}\n') + sys.exit(state_no_response) + return response + + +def main(args=None): + if args is None: + args = sys.argv[1:] # without the path/plugin it self + + args = parse_arguments(args) + + if no_ntplib: + sys.stdout.write( + f'To use this check plugin you need to install the python ntplib in your CMK python environment.' + ) + sys.exit(3) + + ntp_time = get_ntp_time(args.host, args.port, args.timeout, args.version, args.state_no_response) + + server_time = ctime(ntp_time.tx_time) + stratum = int(ntp_time.stratum) + + if stratum == 0: + info_text = f'Server not synchronized. Stratum: 0' + sys.stdout.write(info_text) + return args.state_not_synchronized + + ref_id = _ntp_decode_ref_id(stratum, int(ntp_time.ref_id)) + + info_text = '' + long_output = '' + perfdata = '' + status = 0 + # https://tutorial.eyehunts.com/python/python-strftime-function-milliseconds-examples/ + # time_format = '%Y-%m-%d %H:%M:%S' + + text = f'Stratum: {stratum}' + if stratum >= args.stratum[1]: + status = 2 + text += '(!!)' + elif stratum >= args.stratum[0]: + status = max(status, 1) + text += '(!)' + + info_text += f'{text}, Reference ID: {ref_id}, Time: {server_time}' + long_output += f'{text}\n' + + long_output += f'Ref-ID: {ref_id}, {_ntp_ref_id.get(ref_id, "")}\n' + long_output += f'Time: {server_time}\n' + long_output += f'Mode: {_ntp_mode.get(ntp_time.mode, f"unknown: {ntp_time.mode}")}\n' + long_output += f'Version: {ntp_time.version}\n' + long_output += f'Poll: {ntp_time.poll}\n' + long_output += f'Precision: {ntp_time.precision}\n' + long_output += f'Leap: {_ntp_leap.get(ntp_time.leap, f"unknown {ntp_time.leap}")}\n' + + long_output += '\nPerfdata\n' + for value, warn, crit, label, metric, unit in [ + (ntp_time.offset, args.offset[0] / 1000, args.offset[1] / 1000, 'Offset', 'ntp_offset', 's'), + (ntp_time.delay, args.delay[0] / 1000, args.delay[1] / 1000, 'Delay', 'ntp_delay', 's'), + (ntp_time.root_dispersion, args.dispersion[0], args.dispersion[1], 'Root dispersion', 'ntp_root_dispersion', 's') + ]: + perfdata += f'{metric}={value};{warn};{crit}; ' + text = f'{label}: {value:.4f} {unit}' + if (crit * - 1) > value or value >= crit: # use crit as lower and upper level + status = 2 + info_text += f', {text}(!!)' + long_output += f'{text}(!!)\n' + elif (warn * -1) > value or value >= warn: # use warn as lower and upper level + status = max(status, 1) + info_text += f', {text}(!)' + long_output += f'{text}(!)\n' + else: + long_output += f'{text}\n' + + long_output += '\nTimestamps:\n' + long_output += f'Reference Timestamp (ref): {ntp_time.ref_timestamp}\n' + long_output += f'Origin Timestamp (org): {ntp_time.orig_timestamp}\n' + long_output += f'Receive Timestamp (rec): {ntp_time.recv_timestamp}\n' + long_output += f'Transmit Timestamp (xmt): {ntp_time.tx_timestamp}\n' + long_output += f'Destination Timestamp (dst): {ntp_time.dest_timestamp}\n' + + long_output += '\nTimes\n' + for label, value in [ + ('Reference', ntp_time.ref_time), + ('Origin', ntp_time.orig_time), + ('Receive', ntp_time.recv_time), + ('Transmit', ntp_time.tx_time), + ('Destination', ntp_time.dest_time), + ]: + # long_output += f' {label} time: {strftime(time_format,gmtime(value))}\n' + long_output += f' {label} time: {value}\n' + + info_text = info_text.strip(',').strip(' ') + sys.stdout.write(f'{info_text}\n{long_output} | {perfdata}\n') + + return status + + +if __name__ == '__main__': + exitcode = main() + sys.exit(exitcode) diff --git a/source/packages/check_ntp b/source/packages/check_ntp new file mode 100644 index 0000000000000000000000000000000000000000..a19b5ffe1172dde28a633f4789d75e4ba8598360 --- /dev/null +++ b/source/packages/check_ntp @@ -0,0 +1,12 @@ +{'author': 'Th.L. (thl-cmk[at]outlook[dot]com)', + 'description': 'Active check to monitor NTP servers\n', + 'download_url': 'https://thl-cmk.hopto.org', + 'files': {'checks': ['check_ntp'], + 'gui': ['metrics/check_ntp.py', 'wato/check_ntp.py'], + 'lib': ['nagios/plugins/check_ntp']}, + 'name': 'check_ntp', + 'title': 'Active check NTP', + 'version': '0.0.3-20230607', + 'version.min_required': '2.1.0b1', + 'version.packaged': '2.2.0p14', + 'version.usable_until': '2.2.0b1'}