Add sm-notify from SuSE
authorNeil Brown <neilb@suse.de>
Mon, 19 Mar 2007 22:36:28 +0000 (09:36 +1100)
committerNeil Brown <neilb@suse.de>
Mon, 19 Mar 2007 22:36:28 +0000 (09:36 +1100)
Not included in build yet.

utils/statd/sm-notify.8 [new file with mode: 0644]
utils/statd/sm-notify.c [new file with mode: 0644]

diff --git a/utils/statd/sm-notify.8 b/utils/statd/sm-notify.8
new file mode 100644 (file)
index 0000000..29726d6
--- /dev/null
@@ -0,0 +1,114 @@
+.\"
+.\" sm-notify(8)
+.\"
+.\" Copyright (C) 2004 Olaf Kirch <okir@suse.de>
+.TH sm-notify 8 "8 Mar 2004
+.SH NAME
+sm-notify \- Send out NSM reboot notifications
+.SH SYNOPSIS
+.BI "/sbin/sm-notify [-d] [-q] [-m " time "]
+.SH DESCRIPTION
+File locking over NFS requires a facility to notify peers in
+case of a reboot, so that clients can reclaim locks after
+a server crash, and/or
+servers can release locks held by the rebooted client.
+.PP
+This is a two-step process: during normal
+operations, a mechanism is required to keep track of which
+hosts need to be informed of a reboot. And of course,
+notifications need to be sent out during reboot.
+The protocol used for this is called NSM, for
+.IR "Network Status Monitor" .
+.PP
+Commonly, these two features are provided by the
+.B rpc.statd
+daemon.
+The SuSE Linux Kernel includes a kernel level implementation
+of
+.BR statd ", "
+which keeps track of the hosts that need notifications, and
+accepts reboot notifications.
+.PP
+This is complemented by the
+.B sm-notify
+application, which is responsible for sending out the reboot
+notifications.
+.SS Operation
+For each NFS client or server machine to be monitored,
+the kernel level
+.B statd
+creates a file in
+.BR /var/lib/nfs/sm ", "
+and removes the file if monitoring is no longer required.
+.PP
+When the machine is rebooted,
+.B sm-notify
+iterates through these files and notifies the peer
+.B statd
+server on those machines.
+.PP
+Each machine has an
+.I "NSM state" ,
+which is basically an integer counter that is incremented
+each time the machine reboots. This counter is stored
+in
+.BR /var/lib/nfs/state ,
+and updated by
+.BR sm-notify .
+.SH OPTIONS
+.TP
+.BI -m " failtime
+When notifying hosts,
+.B sm-notify
+will try to contact each host for up to 15 minutes,
+and will give up if unable to reach it within this time
+frame.
+.IP
+Using the
+.B -m
+option, you can override this. A value of 0 tells
+sm-notify to retry indefinitely; any other value is
+interpreted as the maximum retry time in minutes.
+.TP
+.BI -v " ipaddr
+This option tells
+.B sm-notify
+to bind to the specified
+.IR ipaddr ,
+so that all notification packets originate from this address.
+This is useful for NFS failover.
+.TP
+.BI -p " port
+instructs
+.B sm-notify
+to bind to the indicated IP
+.IR port
+number. If this option is not given, it will try to bind to
+a randomly chosen privileged port below 1024.
+.TP
+.B -q
+Be quiet. This suppresses all messages except error
+messages while collecting the list of hosts.
+.TP
+.B -n
+Do not update the NSM state. This is for testing only.
+.TP
+.B -d
+Enables debugging.
+By default,
+.B sm-notify
+forks and puts itself in the background after obtaining the
+list of hosts from
+.BR /var/lib/nfs/sm .
+.SH FILES
+.BR /var/lib/nfs/state
+.br
+.BR /var/lib/nfs/sm/*
+.br
+.BR /var/lib/nfs/sm.bak/*
+.SH SEE ALSO
+.BR rpc.nfsd(8),
+.BR portmap(8)
+.SH AUTHORS
+.br
+Olaf Kirch <okir@suse.de>
diff --git a/utils/statd/sm-notify.c b/utils/statd/sm-notify.c
new file mode 100644 (file)
index 0000000..7af0cea
--- /dev/null
@@ -0,0 +1,678 @@
+/*
+ * Send NSM notify calls to all hosts listed in /var/lib/sm
+ *
+ * Copyright (C) 2004-2006 Olaf Kirch <okir@suse.de>
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/poll.h>
+#include <sys/param.h>
+#include <sys/syslog.h>
+#include <arpa/inet.h>
+#include <dirent.h>
+#include <time.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdarg.h>
+#include <netdb.h>
+#include <errno.h>
+
+#ifndef BASEDIR
+#define BASEDIR                "/var/lib/nfs"
+#endif
+
+#define _SM_STATE_PATH BASEDIR "/state"
+#define        _SM_DIR_PATH    BASEDIR "/sm"
+#define        _SM_BAK_PATH    _SM_DIR_PATH ".bak"
+
+#define NSM_PROG       100024
+#define NSM_PROGRAM    100024
+#define NSM_VERSION    1
+#define NSM_TIMEOUT    2
+#define NSM_NOTIFY     6
+#define NSM_MAX_TIMEOUT        120     /* don't make this too big */
+#define MAXMSGSIZE     256
+
+typedef struct sockaddr_storage nsm_address;
+
+struct nsm_host {
+       struct nsm_host *       next;
+       char *                  name;
+       char *                  path;
+       nsm_address             addr;
+       time_t                  last_used;
+       time_t                  send_next;
+       unsigned int            timeout;
+       unsigned int            retries;
+       unsigned int            xid;
+};
+
+static char            nsm_hostname[256];
+static uint32_t                nsm_state;
+static int             opt_debug = 0;
+static int             opt_quiet = 0;
+static int             opt_update_state = 1;
+static unsigned int    opt_max_retry = 15 * 60;
+static char *          opt_srcaddr = 0;
+static uint16_t                opt_srcport = 0;
+static int             log_syslog = 0;
+
+static unsigned int    nsm_get_state(int);
+static void            notify(void);
+static void            notify_host(int, struct nsm_host *);
+static void            recv_reply(int);
+static void            backup_hosts(const char *, const char *);
+static void            get_hosts(const char *);
+static void            insert_host(struct nsm_host *);
+struct nsm_host *      find_host(uint32_t);
+static int             addr_parse(int, const char *, nsm_address *);
+static int             addr_get_port(nsm_address *);
+static void            addr_set_port(nsm_address *, int);
+static int             host_lookup(int, const char *, nsm_address *);
+void                   nsm_log(int fac, const char *fmt, ...);
+
+static struct nsm_host *       hosts = NULL;
+
+int
+main(int argc, char **argv)
+{
+       int     c;
+
+       while ((c = getopt(argc, argv, "dm:np:v:q")) != -1) {
+               switch (c) {
+               case 'd':
+                       opt_debug++;
+                       break;
+               case 'm':
+                       opt_max_retry = atoi(optarg) * 60;
+                       break;
+               case 'n':
+                       opt_update_state = 0;
+                       break;
+               case 'p':
+                       opt_srcport = atoi(optarg);
+                       break;
+               case 'v':
+                       opt_srcaddr = optarg;
+                       break;
+               case 'q':
+                       opt_quiet = 1;
+                       break;
+               default:
+                       goto usage;
+               }
+       }
+
+       if (optind < argc) {
+usage:         fprintf(stderr, "sm-notify [-d]\n");
+               return 1;
+       }
+
+       if (opt_srcaddr) {
+               strncpy(nsm_hostname, opt_srcaddr, sizeof(nsm_hostname)-1);
+       } else
+       if (gethostname(nsm_hostname, sizeof(nsm_hostname)) < 0) {
+               perror("gethostname");
+               return 1;
+       }
+
+       backup_hosts(_SM_DIR_PATH, _SM_BAK_PATH);
+       get_hosts(_SM_BAK_PATH);
+
+       if (!opt_debug) {
+               if (!opt_quiet)
+                       printf("Backgrounding to notify hosts...\n");
+
+               openlog("sm-notify", LOG_PID, LOG_DAEMON);
+               log_syslog = 1;
+
+               if (daemon(0, 0) < 0) {
+                       nsm_log(LOG_WARNING, "unable to background: %s",
+                                       strerror(errno));
+                       return 1;
+               }
+
+               close(0);
+               close(1);
+               close(2);
+       }
+
+       /* Get and update the NSM state. This will call sync() */
+       nsm_state = nsm_get_state(opt_update_state);
+
+       notify();
+
+       if (hosts) {
+               struct nsm_host *hp;
+
+               while ((hp = hosts) != 0) {
+                       hosts = hp->next;
+                       nsm_log(LOG_NOTICE,
+                               "Unable to notify %s, giving up",
+                               hp->name);
+               }
+               return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Notify hosts
+ */
+void
+notify(void)
+{
+       nsm_address local_addr;
+       time_t  failtime = 0;
+       int     sock = -1;
+
+       sock = socket(AF_INET, SOCK_DGRAM, 0);
+       if (sock < 0) {
+               perror("socket");
+               exit(1);
+       }
+       fcntl(sock, F_SETFL, O_NONBLOCK);
+
+       memset(&local_addr, 0, sizeof(local_addr));
+       local_addr.ss_family = AF_INET; /* Default to IPv4 */
+
+       /* Bind source IP if provided on command line */
+       if (opt_srcaddr) {
+               if (!addr_parse(AF_INET, opt_srcaddr, &local_addr)
+                && !host_lookup(AF_INET, opt_srcaddr, &local_addr)) {
+                       nsm_log(LOG_WARNING,
+                               "Not a valid hostname or address: \"%s\"\n",
+                               opt_srcaddr);
+                       exit(1);
+               }
+               /* We know it's IPv4 at this point */
+       }
+
+       /* Use source port if provided on the command line,
+        * otherwise use bindresvport */
+       if (opt_srcport) {
+               addr_set_port(&local_addr, opt_srcport);
+               if (bind(sock, (struct sockaddr *) &local_addr, sizeof(local_addr)) < 0) {
+                       perror("bind");
+                       exit(1);
+               }
+       } else {
+               (void) bindresvport(sock, (struct sockaddr_in *) &local_addr);
+       }
+
+       if (opt_max_retry)
+               failtime = time(NULL) + opt_max_retry;
+
+       while (hosts) {
+               struct pollfd   pfd;
+               time_t          now = time(NULL);
+               unsigned int    sent = 0;
+               struct nsm_host *hp;
+               long            wait;
+
+               if (failtime && now >= failtime)
+                       break;
+
+               while ((wait = hosts->send_next - now) <= 0) {
+                       /* Never send more than 10 packets at once */
+                       if (sent++ >= 10)
+                               break;
+
+                       /* Remove queue head */
+                       hp = hosts;
+                       hosts = hp->next;
+
+                       notify_host(sock, hp);
+
+                       /* Set the timeout for this call, using an
+                          exponential timeout strategy */
+                       wait = hp->timeout;
+                       if ((hp->timeout <<= 1) > NSM_MAX_TIMEOUT)
+                               hp->timeout = NSM_MAX_TIMEOUT;
+                       hp->send_next = now + wait;
+                       hp->retries++;
+
+                       insert_host(hp);
+               }
+
+               nsm_log(LOG_DEBUG, "Host %s due in %ld seconds",
+                               hosts->name, wait);
+
+               pfd.fd = sock;
+               pfd.events = POLLIN;
+
+               wait *= 1000;
+               if (wait < 100)
+                       wait = 100;
+               if (poll(&pfd, 1, wait) != 1)
+                       continue;
+
+               recv_reply(sock);
+       }
+}
+
+/*
+ * Send notification to a single host
+ */
+void
+notify_host(int sock, struct nsm_host *host)
+{
+       static unsigned int     xid = 0;
+       nsm_address             dest;
+       uint32_t                msgbuf[MAXMSGSIZE], *p;
+       unsigned int            len;
+
+       if (!xid)
+               xid = getpid() + time(NULL);
+       if (!host->xid)
+               host->xid = xid++;
+
+       memset(msgbuf, 0, sizeof(msgbuf));
+       p = msgbuf;
+       *p++ = htonl(host->xid);
+       *p++ = 0;
+       *p++ = htonl(2);
+
+       /* If we retransmitted 4 times, reset the port to force
+        * a new portmap lookup (in case statd was restarted)
+        */
+       if (host->retries >= 4) {
+               addr_set_port(&host->addr, 0);
+               host->retries = 0;
+       }
+
+       dest = host->addr;
+       if (addr_get_port(&dest) == 0) {
+               /* Build a PMAP packet */
+               nsm_log(LOG_DEBUG, "Sending portmap query to %s", host->name);
+
+               addr_set_port(&dest, 111);
+               *p++ = htonl(100000);
+               *p++ = htonl(2);
+               *p++ = htonl(3);
+
+               /* Auth and verf */
+               *p++ = 0; *p++ = 0;
+               *p++ = 0; *p++ = 0;
+
+               *p++ = htonl(NSM_PROGRAM);
+               *p++ = htonl(NSM_VERSION);
+               *p++ = htonl(IPPROTO_UDP);
+               *p++ = 0;
+       } else {
+               /* Build an SM_NOTIFY packet */
+               nsm_log(LOG_DEBUG, "Sending SM_NOTIFY to %s", host->name);
+
+               *p++ = htonl(NSM_PROGRAM);
+               *p++ = htonl(NSM_VERSION);
+               *p++ = htonl(NSM_NOTIFY);
+
+               /* Auth and verf */
+               *p++ = 0; *p++ = 0;
+               *p++ = 0; *p++ = 0;
+
+               /* state change */
+               len = strlen(nsm_hostname);
+               *p++ = htonl(len);
+               memcpy(p, nsm_hostname, len);
+               p += (len + 3) >> 2;
+               *p++ = htonl(nsm_state);
+       }
+       len = (p - msgbuf) << 2;
+
+       sendto(sock, msgbuf, len, 0, (struct sockaddr *) &dest, sizeof(dest));
+}
+
+/*
+ * Receive reply from remote host
+ */
+void
+recv_reply(int sock)
+{
+       struct nsm_host *hp;
+       uint32_t        msgbuf[MAXMSGSIZE], *p, *end;
+       uint32_t        xid;
+       int             res;
+
+       res = recv(sock, msgbuf, sizeof(msgbuf), 0);
+       if (res < 0)
+               return;
+
+       nsm_log(LOG_DEBUG, "Received packet...");
+
+       p = msgbuf;
+       end = p + (res >> 2);
+
+       xid = ntohl(*p++);
+       if (*p++ != htonl(1)    /* must be REPLY */
+        || *p++ != htonl(0)    /* must be ACCEPTED */
+        || *p++ != htonl(0)    /* must be NULL verifier */
+        || *p++ != htonl(0)
+        || *p++ != htonl(0))   /* must be SUCCESS */
+               return;
+
+       /* Before we look at the data, find the host struct for
+          this reply */
+       if ((hp = find_host(xid)) == NULL)
+               return;
+
+       if (addr_get_port(&hp->addr) == 0) {
+               /* This was a portmap request */
+               unsigned int    port;
+
+               port = ntohl(*p++);
+               if (p > end)
+                       goto fail;
+
+               hp->send_next = time(NULL);
+               if (port == 0) {
+                       /* No binding for statd. Delay the next
+                        * portmap query for max timeout */
+                       nsm_log(LOG_DEBUG, "No statd on %s", hp->name);
+                       hp->timeout = NSM_MAX_TIMEOUT;
+                       hp->send_next += NSM_MAX_TIMEOUT;
+               } else {
+                       addr_set_port(&hp->addr, port);
+                       if (hp->timeout >= NSM_MAX_TIMEOUT / 4)
+                               hp->timeout = NSM_MAX_TIMEOUT / 4;
+               }
+               hp->xid = 0;
+       } else {
+               /* Successful NOTIFY call. Server returns void,
+                * so nothing we need to do here (except
+                * check that we didn't read past the end of the
+                * packet)
+                */
+               if (p <= end) {
+                       nsm_log(LOG_DEBUG, "Host %s notified successfully", hp->name);
+                       unlink(hp->path);
+                       free(hp->name);
+                       free(hp->path);
+                       free(hp);
+                       return;
+               }
+       }
+
+fail:  /* Re-insert the host */
+       insert_host(hp);
+}
+
+/*
+ * Back up all hosts from the sm directory to sm.bak
+ */
+static void
+backup_hosts(const char *dirname, const char *bakname)
+{
+       struct dirent   *de;
+       DIR             *dir;
+
+       if (!(dir = opendir(dirname))) {
+               perror(dirname);
+               return;
+       }
+
+       while ((de = readdir(dir)) != NULL) {
+               char    src[1024], dst[1024];
+
+               if (de->d_name[0] == '.')
+                       continue;
+
+               snprintf(src, sizeof(src), "%s/%s", dirname, de->d_name);
+               snprintf(dst, sizeof(dst), "%s/%s", bakname, de->d_name);
+               if (rename(src, dst) < 0) {
+                       nsm_log(LOG_WARNING,
+                               "Failed to rename %s -> %s: %m",
+                               src, dst);
+               }
+       }
+       closedir(dir);
+}
+
+/*
+ * Get all entries from sm.bak and convert them to host names
+ */
+static void
+get_hosts(const char *dirname)
+{
+       struct nsm_host *host;
+       struct dirent   *de;
+       DIR             *dir;
+
+       if (!(dir = opendir(dirname))) {
+               perror(dirname);
+               return;
+       }
+
+       host = NULL;
+       while ((de = readdir(dir)) != NULL) {
+               struct stat     stb;
+               char            path[1024];
+
+               if (de->d_name[0] == '.')
+                       continue;
+               if (host == NULL)
+                       host = calloc(1, sizeof(*host));
+
+               snprintf(path, sizeof(path), "%s/%s", dirname, de->d_name);
+               if (!addr_parse(AF_INET, de->d_name, &host->addr)
+                && !addr_parse(AF_INET6, de->d_name, &host->addr)
+                && !host_lookup(AF_INET, de->d_name, &host->addr)) {
+                       nsm_log(LOG_WARNING,
+                               "%s doesn't seem to be a valid address, skipped",
+                               de->d_name);
+                       unlink(path);
+                       continue;
+               }
+
+               if (stat(path, &stb) < 0)
+                       continue;
+               host->last_used = stb.st_mtime;
+               host->timeout = NSM_TIMEOUT;
+               host->path = strdup(path);
+               host->name = strdup(de->d_name);
+
+               insert_host(host);
+               host = NULL;
+       }
+       closedir(dir);
+
+       if (host)
+               free(host);
+}
+
+/*
+ * Insert host into sorted list
+ */
+void
+insert_host(struct nsm_host *host)
+{
+       struct nsm_host **where, *p;
+
+       where = &hosts;
+       while ((p = *where) != 0) {
+               /* Sort in ascending order of timeout */
+               if (host->send_next < p->send_next)
+                       break;
+               /* If we have the same timeout, put the
+                * most recently used host first.
+                * This makes sure that "recent" hosts
+                * get notified first.
+                */
+               if (host->send_next == p->send_next
+                && host->last_used > p->last_used)
+                       break;
+               where = &p->next;
+       }
+
+       host->next = *where;
+       *where = host;
+}
+
+/*
+ * Find host given the XID
+ */
+struct nsm_host *
+find_host(uint32_t xid)
+{
+       struct nsm_host **where, *p;
+
+       where = &hosts;
+       while ((p = *where) != 0) {
+               if (p->xid == xid) {
+                       *where = p->next;
+                       return p;
+               }
+               where = &p->next;
+       }
+       return NULL;
+}
+
+
+/*
+ * Retrieve the current NSM state
+ */
+unsigned int
+nsm_get_state(int update)
+{
+       char            newfile[PATH_MAX];
+       int             fd, state;
+
+       if ((fd = open(_SM_STATE_PATH, O_RDONLY)) < 0) {
+               if (!opt_quiet) {
+                       nsm_log(LOG_WARNING, "%s: %m", _SM_STATE_PATH);
+                       nsm_log(LOG_WARNING, "Creating %s, set initial state 1",
+                               _SM_STATE_PATH);
+               }
+               state = 1;
+               update = 1;
+       } else {
+               if (read(fd, &state, sizeof(state)) != sizeof(state)) {
+                       nsm_log(LOG_WARNING,
+                               "%s: bad file size, setting state = 1",
+                               _SM_STATE_PATH);
+                       state = 1;
+                       update = 1;
+               } else {
+                       if (!(state & 1))
+                               state += 1;
+               }
+               close(fd);
+       }
+
+       if (update) {
+               state += 2;
+               snprintf(newfile, sizeof(newfile),
+                               "%s.new", _SM_STATE_PATH);
+               if ((fd = open(newfile, O_CREAT|O_WRONLY, 0644)) < 0) {
+                       nsm_log(LOG_WARNING, "Cannot create %s: %m", newfile);
+                       exit(1);
+               }
+               if (write(fd, &state, sizeof(state)) != sizeof(state)) {
+                       nsm_log(LOG_WARNING,
+                               "Failed to write state to %s", newfile);
+                       exit(1);
+               }
+               close(fd);
+               if (rename(newfile, _SM_STATE_PATH) < 0) {
+                       nsm_log(LOG_WARNING,
+                               "Cannot create %s: %m", _SM_STATE_PATH);
+                       exit(1);
+               }
+               sync();
+       }
+
+       return state;
+}
+
+/*
+ * Address handling utilities
+ */
+static int
+addr_parse(int af, const char *name, nsm_address *addr)
+{
+       void    *ptr;
+
+       if (af == AF_INET)
+               ptr = &((struct sockaddr_in *) addr)->sin_addr;
+       else if (af == AF_INET6)
+               ptr = &((struct sockaddr_in6 *) addr)->sin6_addr;
+       else
+               return 0;
+       if (inet_pton(af, name, ptr) <= 0)
+               return 0;
+       ((struct sockaddr *) addr)->sa_family = af;
+       return 1;
+}
+
+int
+addr_get_port(nsm_address *addr)
+{
+       switch (((struct sockaddr *) addr)->sa_family) {
+       case AF_INET:
+               return ntohs(((struct sockaddr_in *) addr)->sin_port);
+       case AF_INET6:
+               return ntohs(((struct sockaddr_in6 *) addr)->sin6_port);
+       }
+       return 0;
+}
+
+static void
+addr_set_port(nsm_address *addr, int port)
+{
+       switch (((struct sockaddr *) addr)->sa_family) {
+       case AF_INET:
+               ((struct sockaddr_in *) addr)->sin_port = htons(port);
+               break;
+       case AF_INET6:
+               ((struct sockaddr_in6 *) addr)->sin6_port = htons(port);
+       }
+}
+
+static int
+host_lookup(int af, const char *name, nsm_address *addr)
+{
+       struct addrinfo hints, *ai;
+       int okay = 0;
+
+       memset(&hints, 0, sizeof(hints));
+       hints.ai_family = af;
+
+       if (getaddrinfo(name, NULL, &hints, &ai) != 0)
+               return 0;
+
+       if (ai->ai_addrlen < sizeof(*addr)) {
+               memcpy(addr, ai->ai_addr, ai->ai_addrlen);
+               okay = 1;
+       }
+
+       freeaddrinfo(ai);
+       return okay;
+}
+
+/*
+ * Log a message
+ */
+void
+nsm_log(int fac, const char *fmt, ...)
+{
+       va_list ap;
+
+       if (fac == LOG_DEBUG && !opt_debug)
+               return;
+
+       va_start(ap, fmt);
+       if (log_syslog)
+               vsyslog(fac, fmt, ap);
+       else {
+               vfprintf(stderr, fmt, ap);
+               fputs("\n", stderr);
+       }
+       va_end(ap);
+}