diff options
author | Thomas Guyot-Sionnest <dermoth@users.sourceforge.net> | 2007-12-04 12:38:56 +0000 |
---|---|---|
committer | Thomas Guyot-Sionnest <dermoth@users.sourceforge.net> | 2007-12-04 12:38:56 +0000 |
commit | 0d34c4da9decb76ea32b73c68497a6b83a791bd1 (patch) | |
tree | f6c6583fa76f6740afc733eb1cd379c0f3b5f1e3 /plugins | |
parent | ee6c34108031a217a5e92df50a1cb6544ff07cf9 (diff) | |
download | monitoring-plugins-0d34c4da9decb76ea32b73c68497a6b83a791bd1.tar.gz |
Merge changes from branches/dermoth_ntp_rework (check_ntp_peer/check_ntp_time)
NEWS | 13
plugins/Makefile.am | 8
plugins/check_ntp_peer.c | 628 ++++++++++++++++-------------------------------
plugins/check_ntp_time.c | 323 ++----------------------
plugins/t/check_ntp.t | 92 +++++-
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@1846 f882894a-f735-0410-b71e-b25c423dba1c
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/Makefile.am | 8 | ||||
-rw-r--r-- | plugins/check_ntp_peer.c | 628 | ||||
-rw-r--r-- | plugins/check_ntp_time.c | 323 | ||||
-rw-r--r-- | plugins/t/check_ntp.t | 92 |
4 files changed, 334 insertions, 717 deletions
diff --git a/plugins/Makefile.am b/plugins/Makefile.am index c0486bc1..eafcc5cd 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -14,8 +14,8 @@ MATHLIBS = @MATHLIBS@ #AM_CFLAGS = -Wall libexec_PROGRAMS = check_apt check_cluster check_disk check_dummy check_http check_load \ - check_mrtg check_mrtgtraf check_ntp check_nwstat check_overcr check_ping \ - check_real check_smtp check_ssh check_tcp check_time \ + check_mrtg check_mrtgtraf check_ntp check_ntp_peer check_nwstat check_overcr check_ping \ + check_real check_smtp check_ssh check_tcp check_time check_ntp_time \ check_ups check_users negate \ urlize @EXTRAS@ @@ -70,6 +70,7 @@ check_mysql_query_LDADD = $(NETLIBS) $(MYSQLLIBS) check_nagios_LDADD = $(BASEOBJS) runcmd.o check_nt_LDADD = $(NETLIBS) check_ntp_LDADD = $(NETLIBS) $(MATHLIBS) +check_ntp_peer_LDADD = $(NETLIBS) $(MATHLIBS) check_nwstat_LDADD = $(NETLIBS) check_overcr_LDADD = $(NETLIBS) check_pgsql_LDADD = $(NETLIBS) $(PGLIBS) @@ -83,6 +84,7 @@ check_ssh_LDADD = $(NETLIBS) check_swap_LDADD = $(MATHLIBS) $(BASEOBJS) popen.o check_tcp_LDADD = $(SSLOBJS) $(NETLIBS) $(SSLLIBS) check_time_LDADD = $(NETLIBS) +check_ntp_time_LDADD = $(NETLIBS) $(MATHLIBS) check_ups_LDADD = $(NETLIBS) check_users_LDADD = $(BASEOBJS) popen.o check_by_ssh_LDADD = $(NETLIBS) runcmd.o @@ -110,6 +112,7 @@ check_mysql_query_DEPENDENCIES = check_mysql_query.c $(NETOBJS) $(DEPLIBS) check_nagios_DEPENDENCIES = check_nagios.c $(BASEOBJS) runcmd.o $(DEPLIBS) check_nt_DEPENDENCIES = check_nt.c $(NETOBJS) $(DEPLIBS) check_ntp_DEPENDENCIES = check_ntp.c $(NETOBJS) $(DEPLIBS) +check_ntp_peer_DEPENDENCIES = check_ntp_peer.c $(NETOBJS) $(DEPLIBS) check_nwstat_DEPENDENCIES = check_nwstat.c $(NETOBJS) $(DEPLIBS) check_overcr_DEPENDENCIES = check_overcr.c $(NETOBJS) $(DEPLIBS) check_pgsql_DEPENDENCIES = check_pgsql.c $(NETOBJS) $(DEPLIBS) @@ -123,6 +126,7 @@ check_ssh_DEPENDENCIES = check_ssh.c $(NETOBJS) $(DEPLIBS) check_swap_DEPENDENCIES = check_swap.c $(BASEOBJS) popen.o $(DEPLIBS) check_tcp_DEPENDENCIES = check_tcp.c $(SSLOBJS) $(NETOBJS) $(DEPLIBS) check_time_DEPENDENCIES = check_time.c $(NETOBJS) $(DEPLIBS) +check_ntp_time_DEPENDENCIES = check_ntp_time.c $(NETOBJS) $(DEPLIBS) check_ups_DEPENDENCIES = check_ups.c $(NETOBJS) $(DEPLIBS) check_users_DEPENDENCIES = check_users.c $(BASEOBJS) popen.o $(DEPLIBS) check_by_ssh_DEPENDENCIES = check_by_ssh.c $(NETOBJS) runcmd.o $(DEPLIBS) diff --git a/plugins/check_ntp_peer.c b/plugins/check_ntp_peer.c index 164d5190..1c4702c8 100644 --- a/plugins/check_ntp_peer.c +++ b/plugins/check_ntp_peer.c @@ -1,6 +1,6 @@ /****************************************************************************** * -* Nagios check_ntp plugin +* Nagios check_ntp_peer plugin * * License: GPL * Copyright (c) 2006 sean finney <seanius@seanius.net> @@ -10,11 +10,16 @@ * * Description: * -* This file contains the check_ntp plugin +* This file contains the check_ntp_peer plugin * -* This plugin to check ntp servers independant of any commandline +* This plugin checks an NTP server independent of any commandline * programs or external libraries. * +* Use this plugin to check the health of an NTP server. It supports +* checking the offset with the sync peer, the jitter and stratum. This +* plugin will not check the clock offset between the local host and NTP +* server; please use check_ntp_time for that purpose. +* * * License Information: * @@ -36,7 +41,7 @@ *****************************************************************************/ -const char *progname = "check_ntp"; +const char *progname = "check_ntp_peer"; const char *revision = "$Revision$"; const char *copyright = "2007"; const char *email = "nagiosplug-devel@lists.sourceforge.net"; @@ -47,51 +52,28 @@ const char *email = "nagiosplug-devel@lists.sourceforge.net"; static char *server_address=NULL; static int verbose=0; +static int quiet=0; static short do_offset=0; static char *owarn="60"; static char *ocrit="120"; +static short do_stratum=0; +static char *swarn="-1:16"; +static char *scrit="-1:16"; static short do_jitter=0; -static char *jwarn="5000"; -static char *jcrit="10000"; +static char *jwarn="-1:5000"; +static char *jcrit="-1:10000"; +static int syncsource_found=0; int process_arguments (int, char **); thresholds *offset_thresholds = NULL; thresholds *jitter_thresholds = NULL; +thresholds *stratum_thresholds = NULL; void print_help (void); void print_usage (void); -/* number of times to perform each request to get a good average. */ -#define AVG_NUM 4 - /* max size of control message data */ #define MAX_CM_SIZE 468 -/* this structure holds everything in an ntp request/response as per rfc1305 */ -typedef struct { - uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ - uint8_t stratum; /* clock stratum */ - int8_t poll; /* polling interval */ - int8_t precision; /* precision of the local clock */ - int32_t rtdelay; /* total rt delay, as a fixed point num. see macros */ - uint32_t rtdisp; /* like above, but for max err to primary src */ - uint32_t refid; /* ref clock identifier */ - uint64_t refts; /* reference timestamp. local time local clock */ - uint64_t origts; /* time at which request departed client */ - uint64_t rxts; /* time at which request arrived at server */ - uint64_t txts; /* time at which request departed server */ -} ntp_message; - -/* this structure holds data about results from querying offset from a peer */ -typedef struct { - time_t waiting; /* ts set when we started waiting for a response */ - int num_responses; /* number of successfully recieved responses */ - uint8_t stratum; /* copied verbatim from the ntp_message */ - double rtdelay; /* converted from the ntp_message */ - double rtdisp; /* converted from the ntp_message */ - double offset[AVG_NUM]; /* offsets from each response */ - uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ -} ntp_server_results; - /* this structure holds everything in an ntp control message as per rfc1305 */ typedef struct { uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ @@ -147,57 +129,6 @@ typedef struct { #define PEER_INCLUDED 0x04 #define PEER_SYNCSOURCE 0x06 -/** - ** a note about the 32-bit "fixed point" numbers: - ** - they are divided into halves, each being a 16-bit int in network byte order: - - the first 16 bits are an int on the left side of a decimal point. - - the second 16 bits represent a fraction n/(2^16) - likewise for the 64-bit "fixed point" numbers with everything doubled :) - **/ - -/* macros to access the left/right 16 bits of a 32-bit ntp "fixed point" - number. note that these can be used as lvalues too */ -#define L16(x) (((uint16_t*)&x)[0]) -#define R16(x) (((uint16_t*)&x)[1]) -/* macros to access the left/right 32 bits of a 64-bit ntp "fixed point" - number. these too can be used as lvalues */ -#define L32(x) (((uint32_t*)&x)[0]) -#define R32(x) (((uint32_t*)&x)[1]) - -/* ntp wants seconds since 1/1/00, epoch is 1/1/70. this is the difference */ -#define EPOCHDIFF 0x83aa7e80UL - -/* extract a 32-bit ntp fixed point number into a double */ -#define NTP32asDOUBLE(x) (ntohs(L16(x)) + (double)ntohs(R16(x))/65536.0) - -/* likewise for a 64-bit ntp fp number */ -#define NTP64asDOUBLE(n) (double)(((uint64_t)n)?\ - (ntohl(L32(n))-EPOCHDIFF) + \ - (.00000001*(0.5+(double)(ntohl(R32(n))/42.94967296))):\ - 0) - -/* convert a struct timeval to a double */ -#define TVasDOUBLE(x) (double)(x.tv_sec+(0.000001*x.tv_usec)) - -/* convert an ntp 64-bit fp number to a struct timeval */ -#define NTP64toTV(n,t) \ - do{ if(!n) t.tv_sec = t.tv_usec = 0; \ - else { \ - t.tv_sec=ntohl(L32(n))-EPOCHDIFF; \ - t.tv_usec=(int)(0.5+(double)(ntohl(R32(n))/4294.967296)); \ - } \ - }while(0) - -/* convert a struct timeval to an ntp 64-bit fp number */ -#define TVtoNTP64(t,n) \ - do{ if(!t.tv_usec && !t.tv_sec) n=0x0UL; \ - else { \ - L32(n)=htonl(t.tv_sec + EPOCHDIFF); \ - R32(n)=htonl((uint64_t)((4294.967296*t.tv_usec)+.5)); \ - } \ - } while(0) - /* NTP control message header is 12 bytes, plus any data in the data * field, plus null padding to the nearest 32-bit boundary per rfc. */ @@ -210,42 +141,6 @@ typedef struct { printf("%u.%u.%u.%u", (x>>24)&0xff, (x>>16)&0xff, (x>>8)&0xff, x&0xff);\ }while(0); -/* calculate the offset of the local clock */ -static inline double calc_offset(const ntp_message *m, const struct timeval *t){ - double client_tx, peer_rx, peer_tx, client_rx; - client_tx = NTP64asDOUBLE(m->origts); - peer_rx = NTP64asDOUBLE(m->rxts); - peer_tx = NTP64asDOUBLE(m->txts); - client_rx=TVasDOUBLE((*t)); - return (.5*((peer_tx-client_rx)+(peer_rx-client_tx))); -} - -/* print out a ntp packet in human readable/debuggable format */ -void print_ntp_message(const ntp_message *p){ - struct timeval ref, orig, rx, tx; - - NTP64toTV(p->refts,ref); - NTP64toTV(p->origts,orig); - NTP64toTV(p->rxts,rx); - NTP64toTV(p->txts,tx); - - printf("packet contents:\n"); - printf("\tflags: 0x%.2x\n", p->flags); - printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); - printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); - printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); - printf("\tstratum = %d\n", p->stratum); - printf("\tpoll = %g\n", pow(2, p->poll)); - printf("\tprecision = %g\n", pow(2, p->precision)); - printf("\trtdelay = %-.16g\n", NTP32asDOUBLE(p->rtdelay)); - printf("\trtdisp = %-.16g\n", NTP32asDOUBLE(p->rtdisp)); - printf("\trefid = %x\n", p->refid); - printf("\trefts = %-.16g\n", NTP64asDOUBLE(p->refts)); - printf("\torigts = %-.16g\n", NTP64asDOUBLE(p->origts)); - printf("\trxts = %-.16g\n", NTP64asDOUBLE(p->rxts)); - printf("\ttxts = %-.16g\n", NTP64asDOUBLE(p->txts)); -} - void print_ntp_control_message(const ntp_control_message *p){ int i=0, numpeers=0; const ntp_assoc_status_pair *peer=NULL; @@ -282,222 +177,25 @@ void print_ntp_control_message(const ntp_control_message *p){ } } -void setup_request(ntp_message *p){ - struct timeval t; - - memset(p, 0, sizeof(ntp_message)); - LI_SET(p->flags, LI_ALARM); - VN_SET(p->flags, 4); - MODE_SET(p->flags, MODE_CLIENT); - p->poll=4; - p->precision=(int8_t)0xfa; - L16(p->rtdelay)=htons(1); - L16(p->rtdisp)=htons(1); - - gettimeofday(&t, NULL); - TVtoNTP64(t,p->txts); -} - -/* select the "best" server from a list of servers, and return its index. - * this is done by filtering servers based on stratum, dispersion, and - * finally round-trip delay. */ -int best_offset_server(const ntp_server_results *slist, int nservers){ - int i=0, j=0, cserver=0, candidates[5], csize=0; - - /* for each server */ - for(cserver=0; cserver<nservers; cserver++){ - /* sort out servers with error flags */ - if ( LI(slist[cserver].flags) != LI_NOWARNING ){ - if (verbose) printf("discarding peer id %d: flags=%d\n", cserver, LI(slist[cserver].flags)); - break; - } - - /* compare it to each of the servers already in the candidate list */ - for(i=0; i<csize; i++){ - /* does it have an equal or better stratum? */ - if(slist[cserver].stratum <= slist[i].stratum){ - /* does it have an equal or better dispersion? */ - if(slist[cserver].rtdisp <= slist[i].rtdisp){ - /* does it have a better rtdelay? */ - if(slist[cserver].rtdelay < slist[i].rtdelay){ - break; - } - } - } - } - - /* if we haven't reached the current list's end, move everyone - * over one to the right, and insert the new candidate */ - if(i<csize){ - for(j=5; j>i; j--){ - candidates[j]=candidates[j-1]; - } - } - /* regardless, if they should be on the list... */ - if(i<5) { - candidates[i]=cserver; - if(csize<5) csize++; - /* otherwise discard the server */ - } else { - DBG(printf("discarding peer id %d\n", cserver)); - } - } - - if(csize>0) { - DBG(printf("best server selected: peer %d\n", candidates[0])); - return candidates[0]; - } else { - DBG(printf("no peers meeting synchronization criteria :(\n")); - return -1; - } -} - -/* do everything we need to get the total average offset - * - we use a certain amount of parallelization with poll() to ensure - * we don't waste time sitting around waiting for single packets. - * - we also "manually" handle resolving host names and connecting, because - * we have to do it in a way that our lazy macros don't handle currently :( */ -double offset_request(const char *host, int *status){ - int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0; - int servers_completed=0, one_written=0, one_read=0, servers_readable=0, best_index=-1; - time_t now_time=0, start_ts=0; - ntp_message *req=NULL; - double avg_offset=0.; - struct timeval recv_time; - struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; - struct pollfd *ufds=NULL; - ntp_server_results *servers=NULL; - - /* setup hints to only return results from getaddrinfo that we'd like */ - memset(&hints, 0, sizeof(struct addrinfo)); - hints.ai_family = address_family; - hints.ai_protocol = IPPROTO_UDP; - hints.ai_socktype = SOCK_DGRAM; - - /* fill in ai with the list of hosts resolved by the host name */ - ga_result = getaddrinfo(host, "123", &hints, &ai); - if(ga_result!=0){ - die(STATE_UNKNOWN, "error getting address for %s: %s\n", - host, gai_strerror(ga_result)); - } - - /* count the number of returned hosts, and allocate stuff accordingly */ - for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; } - req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts); - if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array"); - socklist=(int*)malloc(sizeof(int)*num_hosts); - if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); - ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts); - if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); - servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts); - if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array"); - memset(servers, 0, sizeof(ntp_server_results)*num_hosts); - - /* setup each socket for writing, and the corresponding struct pollfd */ - ai_tmp=ai; - for(i=0;ai_tmp;i++){ - socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); - if(socklist[i] == -1) { - perror(NULL); - die(STATE_UNKNOWN, "can not create new socket"); - } - if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ - die(STATE_UNKNOWN, "can't create socket connection"); - } else { - ufds[i].fd=socklist[i]; - ufds[i].events=POLLIN; - ufds[i].revents=0; - } - ai_tmp = ai_tmp->ai_next; - } - - /* now do AVG_NUM checks to each host. we stop before timeout/2 seconds - * have passed in order to ensure post-processing and jitter time. */ - now_time=start_ts=time(NULL); - while(servers_completed<num_hosts && now_time-start_ts <= socket_timeout/2){ - /* loop through each server and find each one which hasn't - * been touched in the past second or so and is still lacking - * some responses. for each of these servers, send a new request, - * and update the "waiting" timestamp with the current time. */ - one_written=0; - now_time=time(NULL); - - for(i=0; i<num_hosts; i++){ - if(servers[i].waiting<now_time && servers[i].num_responses<AVG_NUM){ - if(verbose && servers[i].waiting != 0) printf("re-"); - if(verbose) printf("sending request to peer %d\n", i); - setup_request(&req[i]); - write(socklist[i], &req[i], sizeof(ntp_message)); - servers[i].waiting=now_time; - one_written=1; - break; - } - } - - /* quickly poll for any sockets with pending data */ - servers_readable=poll(ufds, num_hosts, 100); - if(servers_readable==-1){ - perror("polling ntp sockets"); - die(STATE_UNKNOWN, "communication errors"); - } - - /* read from any sockets with pending data */ - for(i=0; servers_readable && i<num_hosts; i++){ - if(ufds[i].revents&POLLIN && servers[i].num_responses < AVG_NUM){ - if(verbose) { - printf("response from peer %d: ", i); - } +char *extract_value(const char *varlist, const char *name){ + char *tmpvarlist=NULL, *tmpkey=NULL, *value=NULL; + int last=0; - read(ufds[i].fd, &req[i], sizeof(ntp_message)); - gettimeofday(&recv_time, NULL); - DBG(print_ntp_message(&req[i])); - respnum=servers[i].num_responses++; - servers[i].offset[respnum]=calc_offset(&req[i], &recv_time); - if(verbose) { - printf("offset %.10g\n", servers[i].offset[respnum]); - } - servers[i].stratum=req[i].stratum; - servers[i].rtdisp=NTP32asDOUBLE(req[i].rtdisp); - servers[i].rtdelay=NTP32asDOUBLE(req[i].rtdelay); - servers[i].waiting=0; - servers[i].flags=req[i].flags; - servers_readable--; - one_read = 1; - if(servers[i].num_responses==AVG_NUM) servers_completed++; - } - } - /* lather, rinse, repeat. */ - } + /* The following code require a non-empty varlist */ + if(strlen(varlist) == 0) + return NULL; - if (one_read == 0) { - die(STATE_CRITICAL, "NTP CRITICAL: No response from NTP server\n"); - } + tmpvarlist = strdup(varlist); + tmpkey = strtok(tmpvarlist, "="); - /* now, pick the best server from the list */ - best_index=best_offset_server(servers, num_hosts); - if(best_index < 0){ - *status=STATE_UNKNOWN; - } else { - /* finally, calculate the average offset */ - for(i=0; i<servers[best_index].num_responses;i++){ - avg_offset+=servers[best_index].offset[j]; + do { + if(strstr(tmpkey, name) != NULL) { + value = strtok(NULL, ","); + last = 1; } - avg_offset/=servers[best_index].num_responses; - } + } while (last == 0 && (tmpkey = strtok(NULL, "="))); - /* cleanup */ - /* FIXME: Not closing the socket to avoid re-use of the local port - * which can cause old NTP packets to be read instead of NTP control - * pactets in jitter_request(). THERE MUST BE ANOTHER WAY... - * for(j=0; j<num_hosts; j++){ close(socklist[j]); } */ - free(socklist); - free(ufds); - free(servers); - free(req); - freeaddrinfo(ai); - - if(verbose) printf("overall average offset: %.10g\n", avg_offset); - return avg_offset; + return value; } void @@ -511,20 +209,36 @@ setup_control_request(ntp_control_message *p, uint8_t opcode, uint16_t seq){ /* Remaining fields are zero for requests */ } -/* XXX handle responses with the error bit set */ -double jitter_request(const char *host, int *status){ - int conn=-1, i, npeers=0, num_candidates=0, syncsource_found=0; - int run=0, min_peer_sel=PEER_INCLUDED, num_selected=0, num_valid=0; +/* This function does all the actual work; roughly here's what it does + * beside setting the offest, jitter and stratum passed as argument: + * - offset can be negative, so if it cannot get the offset, offset_result + * is set to UNKNOWN, otherwise OK. + * - jitter and stratum are set to -1 if they cannot be retrieved so any + * positive value means a success retrieving the value. + * - status is set to WARNING if there's no sync.peer (otherwise OK) and is + * the return value of the function. + * status is pretty much useless as syncsource_found is a global variable + * used later in main to check is the server was synchronized. It works + * so I left it alone */ +int ntp_request(const char *host, double *offset, int *offset_result, double *jitter, int *stratum){ + int conn=-1, i, npeers=0, num_candidates=0; + double tmp_offset = 0; + int min_peer_sel=PEER_INCLUDED; int peers_size=0, peer_offset=0; + int status; ntp_assoc_status_pair *peers=NULL; ntp_control_message req; - const char *getvar = "jitter"; - double rval = 0.0, jitter = -1.0; - char *startofvalue=NULL, *nptr=NULL; + const char *getvar = "stratum,offset,jitter"; + char *data, *value, *nptr; void *tmp; + status = STATE_OK; + *offset_result = STATE_UNKNOWN; + *jitter = *stratum = -1; + /* Long-winded explanation: - * Getting the jitter requires a number of steps: + * Getting the sync peer offset, jitter and stratum requires a number of + * steps: * 1) Send a READSTAT request. * 2) Interpret the READSTAT reply * a) The data section contains a list of peer identifiers (16 bits) @@ -535,7 +249,8 @@ double jitter_request(const char *host, int *status){ * set a minimum of warning. * 3) Send a READVAR request for information on each peer identified * in 2b greater than the minimum selection value. - * 4) Extract the jitter value from the data[] (it's ASCII) + * 4) Extract the offset, jitter and stratum value from the data[] + * (it's ASCII) */ my_udp_connect(server_address, 123, &conn); @@ -564,7 +279,7 @@ double jitter_request(const char *host, int *status){ } while(req.op&REM_MORE); /* first, let's find out if we have a sync source, or if there are - * at least some candidates. in the case of the latter we'll issue + * at least some candidates. In the latter case we'll issue * a warning but go ahead with the check on them. */ for (i = 0; i < npeers; i++){ if (PEER_SEL(peers[i].status) >= PEER_INCLUDED){ @@ -578,21 +293,22 @@ double jitter_request(const char *host, int *status){ if(verbose) printf("%d candiate peers available\n", num_candidates); if(verbose && syncsource_found) printf("synchronization source found\n"); if(! syncsource_found){ - *status = STATE_UNKNOWN; + status = STATE_WARNING; if(verbose) printf("warning: no synchronization source found\n"); } - for (run=0; run<AVG_NUM; run++){ - if(verbose) printf("jitter run %d of %d\n", run+1, AVG_NUM); - for (i = 0; i < npeers; i++){ - /* Only query this server if it is the current sync source */ - if (PEER_SEL(peers[i].status) >= min_peer_sel){ - num_selected++; + for (i = 0; i < npeers; i++){ + /* Only query this server if it is the current sync source */ + /* If there's no sync.peer, query all candidates and use the best one */ + if (PEER_SEL(peers[i].status) >= min_peer_sel){ + if(verbose) printf("Getting offset, jitter and stratum for peer %.2x\n", ntohs(peers[i].assoc)); + asprintf(&data, ""); + do{ setup_control_request(&req, OP_READVAR, 2); req.assoc = peers[i].assoc; - /* By spec, putting the variable name "jitter" in the request - * should cause the server to provide _only_ the jitter value. + /* Putting the wanted variable names in the request + * cause the server to provide _only_ the requested values. * thus reducing net traffic, guaranteeing us only a single * datagram in reply, and making intepretation much simpler */ @@ -605,48 +321,97 @@ double jitter_request(const char *host, int *status){ DBG(print_ntp_control_message(&req)); req.count = htons(MAX_CM_SIZE); - DBG(printf("recieving READVAR response...\n")); + DBG(printf("receiving READVAR response...\n")); read(conn, &req, SIZEOF_NTPCM(req)); DBG(print_ntp_control_message(&req)); - if(req.op&REM_ERROR && strstr(getvar, "jitter")) { - if(verbose) printf("The 'jitter' command failed (old ntp server?)\nRestarting with 'dispersion'...\n"); - getvar = "dispersion"; - num_selected--; + if(!(req.op&REM_ERROR)) + asprintf(&data, "%s%s", data, req.data); + } while(req.op&REM_MORE); + + if(req.op&REM_ERROR) { + if(strstr(getvar, "jitter")) { + if(verbose) printf("The command failed. This is usually caused by servers refusing the 'jitter'\nvariable. Restarting with 'dispersion'...\n"); + getvar = "stratum,offset,dispersion"; + i--; + continue; + } else if(strlen(getvar)) { + if(verbose) printf("Server didn't like dispersion either; will retrieve everything\n"); + getvar = ""; i--; continue; } + } + + if(verbose > 1) + printf("Server responded: >>>%s<<<\n", data); + + /* get the offset */ + if(verbose) + printf("parsing offset from peer %.2x: ", ntohs(peers[i].assoc)); + + value = extract_value(data, "offset"); + nptr=NULL; + /* Convert the value if we have one */ + if(value != NULL) + tmp_offset = strtod(value, &nptr) / 1000; + /* If value is null or no conversion was performed */ + if(value == NULL || value==nptr) { + if(verbose) printf("error: unable to read server offset response.\n"); + } else { + if(verbose) printf("%.10g\n", tmp_offset); + if(*offset_result == STATE_UNKNOWN || fabs(tmp_offset) < fabs(*offset)) { + *offset = tmp_offset; + *offset_result = STATE_OK; + } else { + /* Skip this one; move to the next */ + continue; + } + } - /* get to the float value */ + if(do_jitter) { + /* get the jitter */ if(verbose) { - printf("parsing jitter from peer %.2x: ", ntohs(peers[i].assoc)); + printf("parsing %s from peer %.2x: ", strstr(getvar, "dispersion") != NULL ? "dispersion" : "jitter", ntohs(peers[i].assoc)); } - startofvalue = strchr(req.data, '='); - if(startofvalue != NULL) { - startofvalue++; - jitter = strtod(startofvalue, &nptr); + value = extract_value(data, strstr(getvar, "dispersion") != NULL ? "dispersion" : "jitter"); + nptr=NULL; + /* Convert the value if we have one */ + if(value != NULL) + *jitter = strtod(value, &nptr); + /* If value is null or no conversion was performed */ + if(value == NULL || value==nptr) { + if(verbose) printf("error: unable to read server jitter/dispersion response.\n"); + *jitter = -1; + } else if(verbose) { + printf("%.10g\n", *jitter); + } + } + + if(do_stratum) { + /* get the stratum */ + if(verbose) { + printf("parsing stratum from peer %.2x: ", ntohs(peers[i].assoc)); } - if(startofvalue == NULL || startofvalue==nptr){ - printf("warning: unable to read server jitter response.\n"); - *status = STATE_UNKNOWN; + value = extract_value(data, "stratum"); + nptr=NULL; + /* Convert the value if we have one */ + if(value != NULL) + *stratum = strtol(value, &nptr, 10); + if(value == NULL || value==nptr) { + if(verbose) printf("error: unable to read server stratum response.\n"); + *stratum = -1; } else { - if(verbose) printf("%g\n", jitter); - num_valid++; - rval += jitter; + if(verbose) printf("%i\n", *stratum); } } - } - if(verbose){ - printf("jitter parsed from %d/%d peers\n", num_valid, num_selected); - } - } - - rval = num_valid ? rval / num_valid : -1.0; + } /* if (PEER_SEL(peers[i].status) >= min_peer_sel) */ + } /* for (i = 0; i < npeers; i++) */ close(conn); if(peers!=NULL) free(peers); - /* If we return -1.0, it means no synchronization source was found */ - return rval; + + return status; } int process_arguments(int argc, char **argv){ @@ -658,8 +423,11 @@ int process_arguments(int argc, char **argv){ {"verbose", no_argument, 0, 'v'}, {"use-ipv4", no_argument, 0, '4'}, {"use-ipv6", no_argument, 0, '6'}, + {"quiet", no_argument, 0, 'q'}, {"warning", required_argument, 0, 'w'}, {"critical", required_argument, 0, 'c'}, + {"swarn", required_argument, 0, 'W'}, + {"scrit", required_argument, 0, 'C'}, {"jwarn", required_argument, 0, 'j'}, {"jcrit", required_argument, 0, 'k'}, {"timeout", required_argument, 0, 't'}, @@ -672,7 +440,7 @@ int process_arguments(int argc, char **argv){ usage ("\n"); while (1) { - c = getopt_long (argc, argv, "Vhv46w:c:j:k:t:H:", longopts, &option); + c = getopt_long (argc, argv, "Vhv46qw:c:W:C:j:k:t:H:", longopts, &option); if (c == -1 || c == EOF || c == 1) break; @@ -688,6 +456,9 @@ int process_arguments(int argc, char **argv){ case 'v': verbose++; break; + case 'q': + quiet = 1; + break; case 'w': do_offset=1; owarn = optarg; @@ -696,6 +467,14 @@ int process_arguments(int argc, char **argv){ do_offset=1; ocrit = optarg; break; + case 'W': + do_stratum=1; + swarn = optarg; + break; + case 'C': + do_stratum=1; + scrit = optarg; + break; case 'j': do_jitter=1; jwarn = optarg; @@ -746,24 +525,31 @@ char *perfd_offset (double offset) char *perfd_jitter (double jitter) { - return fperfdata ("jitter", jitter, "s", + return fperfdata ("jitter", jitter, "", do_jitter, jitter_thresholds->warning->end, do_jitter, jitter_thresholds->critical->end, TRUE, 0, FALSE, 0); } +char *perfd_stratum (int stratum) +{ + return perfdata ("stratum", stratum, "", + do_stratum, (int)stratum_thresholds->warning->end, + do_stratum, (int)stratum_thresholds->critical->end, + TRUE, 0, TRUE, 16); +} + int main(int argc, char *argv[]){ - int result, offset_result, jitter_result; + int result, offset_result, stratum; double offset=0, jitter=0; char *result_line, *perfdata_line; - result = offset_result = jitter_result = STATE_OK; - if (process_arguments (argc, argv) == ERROR) usage4 (_("Could not parse arguments")); set_thresholds(&offset_thresholds, owarn, ocrit); set_thresholds(&jitter_thresholds, jwarn, jcrit); + set_thresholds(&stratum_thresholds, swarn, scrit); /* initialize alarm signal handling */ signal (SIGALRM, socket_timeout_alarm_handler); @@ -771,29 +557,24 @@ int main(int argc, char *argv[]){ /* set socket timeout */ alarm (socket_timeout); - offset = offset_request(server_address, &offset_result); - /* check_ntp used to always return CRITICAL if offset_result == STATE_UNKNOWN. - * Now we'll only do that is the offset thresholds were set */ - if (do_offset && offset_result == STATE_UNKNOWN) { - result = STATE_CRITICAL; + /* This returns either OK or WARNING (See comment preceeding ntp_request) */ + result = ntp_request(server_address, &offset, &offset_result, &jitter, &stratum); + + if(offset_result == STATE_UNKNOWN) { + /* if there's no sync peer (this overrides ntp_request output): */ + result = (quiet == 1 ? STATE_UNKNOWN : STATE_CRITICAL); } else { - result = get_status(fabs(offset), offset_thresholds); + /* Be quiet if there's no candidates either */ + if (quiet == 1 && result == STATE_WARNING) + result = STATE_UNKNOWN; + result = max_state_alt(result, get_status(fabs(offset), offset_thresholds)); } - /* If not told to check the jitter, we don't even send packets. - * jitter is checked using NTP control packets, which not all - * servers recognize. Trying to check the jitter on OpenNTPD - * (for example) will result in an error - */ - if(do_jitter){ - jitter=jitter_request(server_address, &jitter_result); + if(do_stratum) + result = max_state_alt(result, get_status(stratum, stratum_thresholds)); + + if(do_jitter) result = max_state_alt(result, get_status(jitter, jitter_thresholds)); - /* -1 indicates that we couldn't calculate the jitter - * Only overrides STATE_OK from the offset */ - if(jitter == -1.0 && result == STATE_OK) - result = STATE_UNKNOWN; - } - result = max_state_alt(result, jitter_result); switch (result) { case STATE_CRITICAL : @@ -809,6 +590,9 @@ int main(int argc, char *argv[]){ asprintf(&result_line, "NTP UNKNOWN:"); break; } + if(!syncsource_found) + asprintf(&result_line, "%s %s,", result_line, _("Server not synchronized")); + if(offset_result == STATE_UNKNOWN){ asprintf(&result_line, "%s %s", result_line, _("Offset unknown")); asprintf(&perfdata_line, ""); @@ -818,7 +602,11 @@ int main(int argc, char *argv[]){ } if (do_jitter) { asprintf(&result_line, "%s, jitter=%f", result_line, jitter); - asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_jitter(jitter)); + asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_jitter(jitter)); + } + if (do_stratum) { + asprintf(&result_line, "%s, stratum=%i", result_line, stratum); + asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_stratum(stratum)); } printf("%s|%s\n", result_line, perfdata_line); @@ -833,18 +621,24 @@ void print_help(void){ printf ("Copyright (c) 2006 Sean Finney\n"); printf (COPYRIGHT, copyright, email); - - printf ("%s\n", _("This plugin checks the selected ntp server")); - printf ("\n\n"); - + printf ("%s\n", _("This plugin checks the selected ntp server")); + + printf ("\n\n"); + print_usage(); printf (_(UT_HELP_VRSN)); printf (_(UT_HOST_PORT), 'p', "123"); + printf (" %s\n", "-q, --quiet"); + printf (" %s\n", _("Returns UNKNOWN instead of CRITICAL or WARNING if server isn't synchronized")); printf (" %s\n", "-w, --warning=THRESHOLD"); printf (" %s\n", _("Offset to result in warning status (seconds)")); printf (" %s\n", "-c, --critical=THRESHOLD"); printf (" %s\n", _("Offset to result in critical status (seconds)")); + printf (" %s\n", "-W, --warning=THRESHOLD"); + printf (" %s\n", _("Warning threshold for stratum")); + printf (" %s\n", "-W, --critical=THRESHOLD"); + printf (" %s\n", _("Critical threshold for stratum")); printf (" %s\n", "-j, --warning=THRESHOLD"); printf (" %s\n", _("Warning threshold for jitter")); printf (" %s\n", "-k, --critical=THRESHOLD"); @@ -854,17 +648,26 @@ void print_help(void){ printf("\n"); printf("%s\n", _("Notes:")); + printf(" %s\n", _("This plugin checks an NTP server independent of any commandline")); + printf(" %s\n\n", _("programs or external libraries.")); + printf(" %s\n", _("Use this plugin to check the health of an NTP server. It supports")); + printf(" %s\n", _("checking the offset with the sync peer, the jitter and stratum. This")); + printf(" %s\n", _("plugin will not check the clock offset between the local host and NTP")); + printf(" %s\n\n", _("server; please use check_ntp_time for that purpose.")); + printf(" %s\n", _("See:")); printf(" %s\n", ("http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT")); printf(" %s\n", _("for THRESHOLD format and examples.")); printf("\n"); printf("%s\n", _("Examples:")); - printf(" %s\n", _("Normal offset check:")); - printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1")); + printf(" %s\n", _("Normal NTP server check:")); + printf(" %s\n", ("./check_ntp_peer -H ntpserv -w 0.5 -c 1")); printf(" %s\n", _("Check jitter too, avoiding critical notifications if jitter isn't available")); printf(" %s\n", _("(See Notes above for more details on thresholds formats):")); - printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1 -j -1:100 -k -1:200")); + printf(" %s\n", ("./check_ntp_peer -H ntpserv -w 0.5 -c 1 -j -1:100 -k -1:200")); + printf(" %s\n", _("Check only stratum:")); + printf(" %s\n", ("./check_ntp_peer -H ntpserv -W 4 -C 6")); printf (_(UT_SUPPORT)); } @@ -872,6 +675,7 @@ void print_help(void){ void print_usage(void) { - printf (_("Usage:")); - printf(" %s -H <host> [-w <warn>] [-c <crit>] [-j <warn>] [-k <crit>] [-v verbose]\n", progname); + printf (_("Usage:")); + printf(" %s -H <host> [-w <warn>] [-c <crit>] [-W <warn>] [-C <crit>]\n", progname); + printf(" [-j <warn>] [-k <crit>] [-v verbose]\n"); } diff --git a/plugins/check_ntp_time.c b/plugins/check_ntp_time.c index 164d5190..22e78fba 100644 --- a/plugins/check_ntp_time.c +++ b/plugins/check_ntp_time.c @@ -1,6 +1,6 @@ /****************************************************************************** * -* Nagios check_ntp plugin +* Nagios check_ntp_time plugin * * License: GPL * Copyright (c) 2006 sean finney <seanius@seanius.net> @@ -10,10 +10,14 @@ * * Description: * -* This file contains the check_ntp plugin +* This file contains the check_ntp_time plugin * -* This plugin to check ntp servers independant of any commandline -* programs or external libraries. +* This plugin checks the clock offset between the local host and a +* remote NTP server. It is independent of any commandline programs or +* external libraries. +* +* If you'd rather want to monitor an NTP server, please use +* check_ntp_peer. * * * License Information: @@ -47,16 +51,12 @@ const char *email = "nagiosplug-devel@lists.sourceforge.net"; static char *server_address=NULL; static int verbose=0; -static short do_offset=0; +static int quiet=0; static char *owarn="60"; static char *ocrit="120"; -static short do_jitter=0; -static char *jwarn="5000"; -static char *jcrit="10000"; int process_arguments (int, char **); thresholds *offset_thresholds = NULL; -thresholds *jitter_thresholds = NULL; void print_help (void); void print_usage (void); @@ -92,25 +92,6 @@ typedef struct { uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ } ntp_server_results; -/* this structure holds everything in an ntp control message as per rfc1305 */ -typedef struct { - uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ - uint8_t op; /* R,E,M bits and Opcode */ - uint16_t seq; /* Packet sequence */ - uint16_t status; /* Clock status */ - uint16_t assoc; /* Association */ - uint16_t offset; /* Similar to TCP sequence # */ - uint16_t count; /* # bytes of data */ - char data[MAX_CM_SIZE]; /* ASCII data of the request */ - /* NB: not necessarily NULL terminated! */ -} ntp_control_message; - -/* this is an association/status-word pair found in control packet reponses */ -typedef struct { - uint16_t assoc; - uint16_t status; -} ntp_assoc_status_pair; - /* bits 1,2 are the leap indicator */ #define LI_MASK 0xc0 #define LI(x) ((x&LI_MASK)>>6) @@ -246,42 +227,6 @@ void print_ntp_message(const ntp_message *p){ printf("\ttxts = %-.16g\n", NTP64asDOUBLE(p->txts)); } -void print_ntp_control_message(const ntp_control_message *p){ - int i=0, numpeers=0; - const ntp_assoc_status_pair *peer=NULL; - - printf("control packet contents:\n"); - printf("\tflags: 0x%.2x , 0x%.2x\n", p->flags, p->op); - printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); - printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); - printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); - printf("\t response=%d (0x%.2x)\n", (p->op&REM_RESP)>0, p->op&REM_RESP); - printf("\t more=%d (0x%.2x)\n", (p->op&REM_MORE)>0, p->op&REM_MORE); - printf("\t error=%d (0x%.2x)\n", (p->op&REM_ERROR)>0, p->op&REM_ERROR); - printf("\t op=%d (0x%.2x)\n", p->op&OP_MASK, p->op&OP_MASK); - printf("\tsequence: %d (0x%.2x)\n", ntohs(p->seq), ntohs(p->seq)); - printf("\tstatus: %d (0x%.2x)\n", ntohs(p->status), ntohs(p->status)); - printf("\tassoc: %d (0x%.2x)\n", ntohs(p->assoc), ntohs(p->assoc)); - printf("\toffset: %d (0x%.2x)\n", ntohs(p->offset), ntohs(p->offset)); - printf("\tcount: %d (0x%.2x)\n", ntohs(p->count), ntohs(p->count)); - numpeers=ntohs(p->count)/(sizeof(ntp_assoc_status_pair)); - if(p->op&REM_RESP && p->op&OP_READSTAT){ - peer=(ntp_assoc_status_pair*)p->data; - for(i=0;i<numpeers;i++){ - printf("\tpeer id %.2x status %.2x", - ntohs(peer[i].assoc), ntohs(peer[i].status)); - if (PEER_SEL(peer[i].status) >= PEER_INCLUDED){ - if(PEER_SEL(peer[i].status) >= PEER_SYNCSOURCE){ - printf(" <-- current sync source"); - } else { - printf(" <-- current sync candidate"); - } - } - printf("\n"); - } - } -} - void setup_request(ntp_message *p){ struct timeval t; @@ -411,13 +356,13 @@ double offset_request(const char *host, int *status){ ai_tmp = ai_tmp->ai_next; } - /* now do AVG_NUM checks to each host. we stop before timeout/2 seconds + /* now do AVG_NUM checks to each host. We stop before timeout/2 seconds * have passed in order to ensure post-processing and jitter time. */ now_time=start_ts=time(NULL); while(servers_completed<num_hosts && now_time-start_ts <= socket_timeout/2){ /* loop through each server and find each one which hasn't * been touched in the past second or so and is still lacking - * some responses. for each of these servers, send a new request, + * some responses. For each of these servers, send a new request, * and update the "waiting" timestamp with the current time. */ one_written=0; now_time=time(NULL); @@ -486,10 +431,7 @@ double offset_request(const char *host, int *status){ } /* cleanup */ - /* FIXME: Not closing the socket to avoid re-use of the local port - * which can cause old NTP packets to be read instead of NTP control - * pactets in jitter_request(). THERE MUST BE ANOTHER WAY... - * for(j=0; j<num_hosts; j++){ close(socklist[j]); } */ + for(j=0; j<num_hosts; j++){ close(socklist[j]); } free(socklist); free(ufds); free(servers); @@ -500,155 +442,6 @@ double offset_request(const char *host, int *status){ return avg_offset; } -void -setup_control_request(ntp_control_message *p, uint8_t opcode, uint16_t seq){ - memset(p, 0, sizeof(ntp_control_message)); - LI_SET(p->flags, LI_NOWARNING); - VN_SET(p->flags, VN_RESERVED); - MODE_SET(p->flags, MODE_CONTROLMSG); - OP_SET(p->op, opcode); - p->seq = htons(seq); - /* Remaining fields are zero for requests */ -} - -/* XXX handle responses with the error bit set */ -double jitter_request(const char *host, int *status){ - int conn=-1, i, npeers=0, num_candidates=0, syncsource_found=0; - int run=0, min_peer_sel=PEER_INCLUDED, num_selected=0, num_valid=0; - int peers_size=0, peer_offset=0; - ntp_assoc_status_pair *peers=NULL; - ntp_control_message req; - const char *getvar = "jitter"; - double rval = 0.0, jitter = -1.0; - char *startofvalue=NULL, *nptr=NULL; - void *tmp; - - /* Long-winded explanation: - * Getting the jitter requires a number of steps: - * 1) Send a READSTAT request. - * 2) Interpret the READSTAT reply - * a) The data section contains a list of peer identifiers (16 bits) - * and associated status words (16 bits) - * b) We want the value of 0x06 in the SEL (peer selection) value, - * which means "current synchronizatin source". If that's missing, - * we take anything better than 0x04 (see the rfc for details) but - * set a minimum of warning. - * 3) Send a READVAR request for information on each peer identified - * in 2b greater than the minimum selection value. - * 4) Extract the jitter value from the data[] (it's ASCII) - */ - my_udp_connect(server_address, 123, &conn); - - /* keep sending requests until the server stops setting the - * REM_MORE bit, though usually this is only 1 packet. */ - do{ - setup_control_request(&req, OP_READSTAT, 1); - DBG(printf("sending READSTAT request")); - write(conn, &req, SIZEOF_NTPCM(req)); - DBG(print_ntp_control_message(&req)); - /* Attempt to read the largest size packet possible */ - req.count=htons(MAX_CM_SIZE); - DBG(printf("recieving READSTAT response")) - read(conn, &req, SIZEOF_NTPCM(req)); - DBG(print_ntp_control_message(&req)); - /* Each peer identifier is 4 bytes in the data section, which - * we represent as a ntp_assoc_status_pair datatype. - */ - peers_size+=ntohs(req.count); - if((tmp=realloc(peers, peers_size)) == NULL) - free(peers), die(STATE_UNKNOWN, "can not (re)allocate 'peers' buffer\n"); - peers=tmp; - memcpy((void*)((ptrdiff_t)peers+peer_offset), (void*)req.data, ntohs(req.count)); - npeers=peers_size/sizeof(ntp_assoc_status_pair); - peer_offset+=ntohs(req.count); - } while(req.op&REM_MORE); - - /* first, let's find out if we have a sync source, or if there are - * at least some candidates. in the case of the latter we'll issue - * a warning but go ahead with the check on them. */ - for (i = 0; i < npeers; i++){ - if (PEER_SEL(peers[i].status) >= PEER_INCLUDED){ - num_candidates++; - if(PEER_SEL(peers[i].status) >= PEER_SYNCSOURCE){ - syncsource_found=1; - min_peer_sel=PEER_SYNCSOURCE; - } - } - } - if(verbose) printf("%d candiate peers available\n", num_candidates); - if(verbose && syncsource_found) printf("synchronization source found\n"); - if(! syncsource_found){ - *status = STATE_UNKNOWN; - if(verbose) printf("warning: no synchronization source found\n"); - } - - - for (run=0; run<AVG_NUM; run++){ - if(verbose) printf("jitter run %d of %d\n", run+1, AVG_NUM); - for (i = 0; i < npeers; i++){ - /* Only query this server if it is the current sync source */ - if (PEER_SEL(peers[i].status) >= min_peer_sel){ - num_selected++; - setup_control_request(&req, OP_READVAR, 2); - req.assoc = peers[i].assoc; - /* By spec, putting the variable name "jitter" in the request - * should cause the server to provide _only_ the jitter value. - * thus reducing net traffic, guaranteeing us only a single - * datagram in reply, and making intepretation much simpler - */ - /* Older servers doesn't know what jitter is, so if we get an - * error on the first pass we redo it with "dispersion" */ - strncpy(req.data, getvar, MAX_CM_SIZE-1); - req.count = htons(strlen(getvar)); - DBG(printf("sending READVAR request...\n")); - write(conn, &req, SIZEOF_NTPCM(req)); - DBG(print_ntp_control_message(&req)); - - req.count = htons(MAX_CM_SIZE); - DBG(printf("recieving READVAR response...\n")); - read(conn, &req, SIZEOF_NTPCM(req)); - DBG(print_ntp_control_message(&req)); - - if(req.op&REM_ERROR && strstr(getvar, "jitter")) { - if(verbose) printf("The 'jitter' command failed (old ntp server?)\nRestarting with 'dispersion'...\n"); - getvar = "dispersion"; - num_selected--; - i--; - continue; - } - - /* get to the float value */ - if(verbose) { - printf("parsing jitter from peer %.2x: ", ntohs(peers[i].assoc)); - } - startofvalue = strchr(req.data, '='); - if(startofvalue != NULL) { - startofvalue++; - jitter = strtod(startofvalue, &nptr); - } - if(startofvalue == NULL || startofvalue==nptr){ - printf("warning: unable to read server jitter response.\n"); - *status = STATE_UNKNOWN; - } else { - if(verbose) printf("%g\n", jitter); - num_valid++; - rval += jitter; - } - } - } - if(verbose){ - printf("jitter parsed from %d/%d peers\n", num_valid, num_selected); - } - } - - rval = num_valid ? rval / num_valid : -1.0; - - close(conn); - if(peers!=NULL) free(peers); - /* If we return -1.0, it means no synchronization source was found */ - return rval; -} - int process_arguments(int argc, char **argv){ int c; int option=0; @@ -658,10 +451,9 @@ int process_arguments(int argc, char **argv){ {"verbose", no_argument, 0, 'v'}, {"use-ipv4", no_argument, 0, '4'}, {"use-ipv6", no_argument, 0, '6'}, + {"quiet", no_argument, 0, 'q'}, {"warning", required_argument, 0, 'w'}, {"critical", required_argument, 0, 'c'}, - {"jwarn", required_argument, 0, 'j'}, - {"jcrit", required_argument, 0, 'k'}, {"timeout", required_argument, 0, 't'}, {"hostname", required_argument, 0, 'H'}, {0, 0, 0, 0} @@ -672,7 +464,7 @@ int process_arguments(int argc, char **argv){ usage ("\n"); while (1) { - c = getopt_long (argc, argv, "Vhv46w:c:j:k:t:H:", longopts, &option); + c = getopt_long (argc, argv, "Vhv46qw:c:t:H:", longopts, &option); if (c == -1 || c == EOF || c == 1) break; @@ -688,22 +480,15 @@ int process_arguments(int argc, char **argv){ case 'v': verbose++; break; + case 'q': + quiet = 1; + break; case 'w': - do_offset=1; owarn = optarg; break; case 'c': - do_offset=1; ocrit = optarg; break; - case 'j': - do_jitter=1; - jwarn = optarg; - break; - case 'k': - do_jitter=1; - jcrit = optarg; - break; case 'H': if(is_host(optarg) == FALSE) usage2(_("Invalid hostname/address"), optarg); @@ -744,26 +529,17 @@ char *perfd_offset (double offset) FALSE, 0, FALSE, 0); } -char *perfd_jitter (double jitter) -{ - return fperfdata ("jitter", jitter, "s", - do_jitter, jitter_thresholds->warning->end, - do_jitter, jitter_thresholds->critical->end, - TRUE, 0, FALSE, 0); -} - int main(int argc, char *argv[]){ - int result, offset_result, jitter_result; - double offset=0, jitter=0; + int result, offset_result; + double offset=0; char *result_line, *perfdata_line; - result = offset_result = jitter_result = STATE_OK; + result = offset_result = STATE_OK; if (process_arguments (argc, argv) == ERROR) usage4 (_("Could not parse arguments")); set_thresholds(&offset_thresholds, owarn, ocrit); - set_thresholds(&jitter_thresholds, jwarn, jcrit); /* initialize alarm signal handling */ signal (SIGALRM, socket_timeout_alarm_handler); @@ -772,29 +548,12 @@ int main(int argc, char *argv[]){ alarm (socket_timeout); offset = offset_request(server_address, &offset_result); - /* check_ntp used to always return CRITICAL if offset_result == STATE_UNKNOWN. - * Now we'll only do that is the offset thresholds were set */ - if (do_offset && offset_result == STATE_UNKNOWN) { - result = STATE_CRITICAL; + if (offset_result == STATE_UNKNOWN) { + result = (quiet == 1 ? STATE_UNKNOWN : STATE_CRITICAL); } else { result = get_status(fabs(offset), offset_thresholds); } - /* If not told to check the jitter, we don't even send packets. - * jitter is checked using NTP control packets, which not all - * servers recognize. Trying to check the jitter on OpenNTPD - * (for example) will result in an error - */ - if(do_jitter){ - jitter=jitter_request(server_address, &jitter_result); - result = max_state_alt(result, get_status(jitter, jitter_thresholds)); - /* -1 indicates that we couldn't calculate the jitter - * Only overrides STATE_OK from the offset */ - if(jitter == -1.0 && result == STATE_OK) - result = STATE_UNKNOWN; - } - result = max_state_alt(result, jitter_result); - switch (result) { case STATE_CRITICAL : asprintf(&result_line, "NTP CRITICAL:"); @@ -816,55 +575,49 @@ int main(int argc, char *argv[]){ asprintf(&result_line, "%s Offset %.10g secs", result_line, offset); asprintf(&perfdata_line, "%s", perfd_offset(offset)); } - if (do_jitter) { - asprintf(&result_line, "%s, jitter=%f", result_line, jitter); - asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_jitter(jitter)); - } printf("%s|%s\n", result_line, perfdata_line); if(server_address!=NULL) free(server_address); return result; } - - void print_help(void){ print_revision(progname, revision); printf ("Copyright (c) 2006 Sean Finney\n"); printf (COPYRIGHT, copyright, email); - - printf ("%s\n", _("This plugin checks the selected ntp server")); - printf ("\n\n"); - + printf ("%s\n", _("This plugin checks the clock offset with the ntp server")); + + printf ("\n\n"); + print_usage(); printf (_(UT_HELP_VRSN)); printf (_(UT_HOST_PORT), 'p', "123"); + printf (" %s\n", "-q, --quiet"); + printf (" %s\n", _("Returns UNKNOWN instead of CRITICAL if offset cannot be found")); printf (" %s\n", "-w, --warning=THRESHOLD"); printf (" %s\n", _("Offset to result in warning status (seconds)")); printf (" %s\n", "-c, --critical=THRESHOLD"); printf (" %s\n", _("Offset to result in critical status (seconds)")); - printf (" %s\n", "-j, --warning=THRESHOLD"); - printf (" %s\n", _("Warning threshold for jitter")); - printf (" %s\n", "-k, --critical=THRESHOLD"); - printf (" %s\n", _("Critical threshold for jitter")); printf (_(UT_TIMEOUT), DEFAULT_SOCKET_TIMEOUT); printf (_(UT_VERBOSE)); printf("\n"); printf("%s\n", _("Notes:")); + printf(" %s\n", _("This plugin checks the clock offset between the local host and a")); + printf(" %s\n", _("remote NTP server. It is independent of any commandline programs or")); + printf(" %s\n\n", _("external libraries.")); + printf(" %s\n", _("If you'd rather want to monitor an NTP server, please use")); + printf(" %s\n\n", _("check_ntp_peer.")); + printf(" %s\n", _("See:")); printf(" %s\n", ("http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT")); printf(" %s\n", _("for THRESHOLD format and examples.")); printf("\n"); printf("%s\n", _("Examples:")); - printf(" %s\n", _("Normal offset check:")); - printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1")); - printf(" %s\n", _("Check jitter too, avoiding critical notifications if jitter isn't available")); - printf(" %s\n", _("(See Notes above for more details on thresholds formats):")); - printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1 -j -1:100 -k -1:200")); + printf(" %s\n", ("./check_ntp_time -H ntpserv -w 0.5 -c 1")); printf (_(UT_SUPPORT)); } @@ -872,6 +625,8 @@ void print_help(void){ void print_usage(void) { - printf (_("Usage:")); - printf(" %s -H <host> [-w <warn>] [-c <crit>] [-j <warn>] [-k <crit>] [-v verbose]\n", progname); + printf (_("Usage:")); + printf(" %s -H <host> [-w <warn>] [-c <crit>] [-W <warn>] [-C <crit>]\n", progname); + printf(" [-j <warn>] [-k <crit>] [-v verbose]\n"); } + diff --git a/plugins/t/check_ntp.t b/plugins/t/check_ntp.t index 6e222a38..ae7f0369 100644 --- a/plugins/t/check_ntp.t +++ b/plugins/t/check_ntp.t @@ -9,7 +9,10 @@ use strict; use Test::More; use NPTest; -plan tests => 4; +my @PLUGINS1 = ('check_ntp', 'check_ntp_peer', 'check_ntp_time'); +my @PLUGINS2 = ('check_ntp_peer'); + +plan tests => (12 * scalar(@PLUGINS1)) + (6 * scalar(@PLUGINS2)); my $res; @@ -25,33 +28,84 @@ my $host_nonresponsive = getTestParameter( "NP_HOST_NONRESPONSIVE", "The hostname of system not responsive to network requests", "10.0.0.1" ); -my $hostname_invalid = getTestParameter( "NP_HOSTNAME_INVALID", +my $hostname_invalid = getTestParameter( "NP_HOSTNAME_INVALID", "An invalid (not known to DNS) hostname", "nosuchhost"); -SKIP: { - skip "No NTP server defined", 1 unless $ntp_service; +my $ntp_okmatch1 = '/^NTP\sOK:\sOffset\s-?[0-9]+(\.[0-9]+)?(e-[0-9]{2})?\ssecs/'; +my $ntp_warnmatch1 = '/^NTP\sWARNING:\sOffset\s-?[0-9]+(\.[0-9]+)?(e-[0-9]{2})?\ssecs/'; +my $ntp_critmatch1 = '/^NTP\sCRITICAL:\sOffset\s-?[0-9]+(\.[0-9]+)?(e-[0-9]{2})?\ssecs/'; +my $ntp_okmatch2 = '/^NTP\sOK:\sOffset\s-?[0-9]+(\.[0-9]+)?(e-[0-9]{2})?\ssecs,\sjitter=[0-9]+\.[0-9]+,\sstratum=[0-9]{1,2}/'; +my $ntp_warnmatch2 = '/^NTP\sWARNING:\sOffset\s-?[0-9]+(\.[0-9]+)?(e-[0-9]{2})?\ssecs,\sjitter=[0-9]+\.[0-9]+,\sstratum=[0-9]{1,2}/'; +my $ntp_critmatch2 = '/^NTP\sCRITICAL:\sOffset\s-?[0-9]+(\.[0-9]+)?(e-[0-9]{2})?\ssecs,\sjitter=[0-9]+\.[0-9]+,\sstratum=[0-9]{1,2}/'; +my $ntp_noresponse = '/^(CRITICAL - Socket timeout after 3 seconds)|(NTP CRITICAL: No response from NTP server)$/'; +my $ntp_nosuchhost = '/^check_ntp.*: Invalid hostname/address - ' . $hostname_invalid . '/'; + + +foreach my $plugin (@PLUGINS1) { + SKIP: { + skip "No NTP server defined", 1 unless $ntp_service; + $res = NPTest->testCmd( + "./$plugin -H $ntp_service -w 1000 -c 2000" + ); + cmp_ok( $res->return_code, '==', 0, "$plugin: Good NTP result (simple check)" ); + like( $res->output, $ntp_okmatch1, "$plugin: Output match OK (simple check)" ); + + $res = NPTest->testCmd( + "./$plugin -H $ntp_service -w 1000: -c 2000" + ); + cmp_ok( $res->return_code, '==', 1, "$plugin: Warning NTP result (simple check)" ); + like( $res->output, $ntp_warnmatch1, "$plugin: Output match WARNING (simple check)" ); + + $res = NPTest->testCmd( + "./$plugin -H $ntp_service -w 1000 -c 2000:" + ); + cmp_ok( $res->return_code, '==', 2, "$plugin: Critical NTP result (simple check)" ); + like( $res->output, $ntp_critmatch1, "$plugin: Output match CRITICAL (simple check)" ); + } + + SKIP: { + skip "No bad NTP server defined", 1 unless $no_ntp_service; + $res = NPTest->testCmd( + "./$plugin -H $no_ntp_service -t 3" + ); + cmp_ok( $res->return_code, '==', 2, "$plugin: No NTP service" ); + like( $res->output, $ntp_noresponse, "$plugin: Output match no NTP service" ); + } + $res = NPTest->testCmd( - "./check_ntp -H $ntp_service" + "./$plugin -H $host_nonresponsive -t 3" ); - cmp_ok( $res->return_code, '==', 0, "Got good NTP result"); -} + cmp_ok( $res->return_code, '==', 2, "$plugin: Server not responding" ); + like( $res->output, $ntp_noresponse, "$plugin: Output match non-responsive" ); -SKIP: { - skip "No bad NTP server defined", 1 unless $no_ntp_service; $res = NPTest->testCmd( - "./check_ntp -H $no_ntp_service" + "./$plugin -H $hostname_invalid" ); - cmp_ok( $res->return_code, '==', 2, "Got bad NTP result"); + cmp_ok( $res->return_code, '==', 3, "$plugin: Invalid hostname/address" ); + like( $res->output, $ntp_nosuchhost, "$plugin: Output match invalid hostname/address" ); + } -$res = NPTest->testCmd( - "./check_ntp -H $host_nonresponsive" - ); -cmp_ok( $res->return_code, '==', 2, "Got critical if server not responding"); +foreach my $plugin (@PLUGINS2) { + SKIP: { + skip "No NTP server defined", 1 unless $ntp_service; + $res = NPTest->testCmd( + "./$plugin -H $ntp_service -w 1000 -c 2000 -W 20 -C 21 -j 100000 -k 200000" + ); + cmp_ok( $res->return_code, '==', 0, "$plugin: Good NTP result with jitter and stratum check" ); + like( $res->output, $ntp_okmatch2, "$plugin: Output match OK with jitter and stratum" ); -$res = NPTest->testCmd( - "./check_ntp -H $hostname_invalid" - ); -cmp_ok( $res->return_code, '==', 3, "Got critical if server hostname invalid"); + $res = NPTest->testCmd( + "./$plugin -H $ntp_service -w 1000 -c 2000 -W ~:-1 -C 21 -j 100000 -k 200000" + ); + cmp_ok( $res->return_code, '==', 1, "$plugin: Warning NTP result with jitter and stratum check" ); + like( $res->output, $ntp_warnmatch2, "$plugin: Output match WARNING with jitter and stratum" ); + $res = NPTest->testCmd( + "./$plugin -H $ntp_service -w 1000 -c 2000 -W 20 -C 21 -j 100000 -k ~:-1" + ); + cmp_ok( $res->return_code, '==', 2, "$plugin: Critical NTP result with jitter and stratum check" ); + like( $res->output, $ntp_critmatch2, "$plugin: Output match CRITICAL with jitter and stratum" ); + } +} |