From 33cfd406a19a38bc10a977109bd2baaed1228a79 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Tue, 20 Mar 2007 09:36:28 +1100 Subject: [PATCH 1/1] Add sm-notify from SuSE Not included in build yet. --- utils/statd/sm-notify.8 | 114 +++++++ utils/statd/sm-notify.c | 678 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 792 insertions(+) create mode 100644 utils/statd/sm-notify.8 create mode 100644 utils/statd/sm-notify.c diff --git a/utils/statd/sm-notify.8 b/utils/statd/sm-notify.8 new file mode 100644 index 0000000..29726d6 --- /dev/null +++ b/utils/statd/sm-notify.8 @@ -0,0 +1,114 @@ +.\" +.\" sm-notify(8) +.\" +.\" Copyright (C) 2004 Olaf Kirch +.TH sm-notify 8 "8 Mar 2004 +.SH NAME +sm-notify \- Send out NSM reboot notifications +.SH SYNOPSIS +.BI "/sbin/sm-notify [-d] [-q] [-m " time "] +.SH DESCRIPTION +File locking over NFS requires a facility to notify peers in +case of a reboot, so that clients can reclaim locks after +a server crash, and/or +servers can release locks held by the rebooted client. +.PP +This is a two-step process: during normal +operations, a mechanism is required to keep track of which +hosts need to be informed of a reboot. And of course, +notifications need to be sent out during reboot. +The protocol used for this is called NSM, for +.IR "Network Status Monitor" . +.PP +Commonly, these two features are provided by the +.B rpc.statd +daemon. +The SuSE Linux Kernel includes a kernel level implementation +of +.BR statd ", " +which keeps track of the hosts that need notifications, and +accepts reboot notifications. +.PP +This is complemented by the +.B sm-notify +application, which is responsible for sending out the reboot +notifications. +.SS Operation +For each NFS client or server machine to be monitored, +the kernel level +.B statd +creates a file in +.BR /var/lib/nfs/sm ", " +and removes the file if monitoring is no longer required. +.PP +When the machine is rebooted, +.B sm-notify +iterates through these files and notifies the peer +.B statd +server on those machines. +.PP +Each machine has an +.I "NSM state" , +which is basically an integer counter that is incremented +each time the machine reboots. This counter is stored +in +.BR /var/lib/nfs/state , +and updated by +.BR sm-notify . +.SH OPTIONS +.TP +.BI -m " failtime +When notifying hosts, +.B sm-notify +will try to contact each host for up to 15 minutes, +and will give up if unable to reach it within this time +frame. +.IP +Using the +.B -m +option, you can override this. A value of 0 tells +sm-notify to retry indefinitely; any other value is +interpreted as the maximum retry time in minutes. +.TP +.BI -v " ipaddr +This option tells +.B sm-notify +to bind to the specified +.IR ipaddr , +so that all notification packets originate from this address. +This is useful for NFS failover. +.TP +.BI -p " port +instructs +.B sm-notify +to bind to the indicated IP +.IR port +number. If this option is not given, it will try to bind to +a randomly chosen privileged port below 1024. +.TP +.B -q +Be quiet. This suppresses all messages except error +messages while collecting the list of hosts. +.TP +.B -n +Do not update the NSM state. This is for testing only. +.TP +.B -d +Enables debugging. +By default, +.B sm-notify +forks and puts itself in the background after obtaining the +list of hosts from +.BR /var/lib/nfs/sm . +.SH FILES +.BR /var/lib/nfs/state +.br +.BR /var/lib/nfs/sm/* +.br +.BR /var/lib/nfs/sm.bak/* +.SH SEE ALSO +.BR rpc.nfsd(8), +.BR portmap(8) +.SH AUTHORS +.br +Olaf Kirch diff --git a/utils/statd/sm-notify.c b/utils/statd/sm-notify.c new file mode 100644 index 0000000..7af0cea --- /dev/null +++ b/utils/statd/sm-notify.c @@ -0,0 +1,678 @@ +/* + * Send NSM notify calls to all hosts listed in /var/lib/sm + * + * Copyright (C) 2004-2006 Olaf Kirch + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef BASEDIR +#define BASEDIR "/var/lib/nfs" +#endif + +#define _SM_STATE_PATH BASEDIR "/state" +#define _SM_DIR_PATH BASEDIR "/sm" +#define _SM_BAK_PATH _SM_DIR_PATH ".bak" + +#define NSM_PROG 100024 +#define NSM_PROGRAM 100024 +#define NSM_VERSION 1 +#define NSM_TIMEOUT 2 +#define NSM_NOTIFY 6 +#define NSM_MAX_TIMEOUT 120 /* don't make this too big */ +#define MAXMSGSIZE 256 + +typedef struct sockaddr_storage nsm_address; + +struct nsm_host { + struct nsm_host * next; + char * name; + char * path; + nsm_address addr; + time_t last_used; + time_t send_next; + unsigned int timeout; + unsigned int retries; + unsigned int xid; +}; + +static char nsm_hostname[256]; +static uint32_t nsm_state; +static int opt_debug = 0; +static int opt_quiet = 0; +static int opt_update_state = 1; +static unsigned int opt_max_retry = 15 * 60; +static char * opt_srcaddr = 0; +static uint16_t opt_srcport = 0; +static int log_syslog = 0; + +static unsigned int nsm_get_state(int); +static void notify(void); +static void notify_host(int, struct nsm_host *); +static void recv_reply(int); +static void backup_hosts(const char *, const char *); +static void get_hosts(const char *); +static void insert_host(struct nsm_host *); +struct nsm_host * find_host(uint32_t); +static int addr_parse(int, const char *, nsm_address *); +static int addr_get_port(nsm_address *); +static void addr_set_port(nsm_address *, int); +static int host_lookup(int, const char *, nsm_address *); +void nsm_log(int fac, const char *fmt, ...); + +static struct nsm_host * hosts = NULL; + +int +main(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "dm:np:v:q")) != -1) { + switch (c) { + case 'd': + opt_debug++; + break; + case 'm': + opt_max_retry = atoi(optarg) * 60; + break; + case 'n': + opt_update_state = 0; + break; + case 'p': + opt_srcport = atoi(optarg); + break; + case 'v': + opt_srcaddr = optarg; + break; + case 'q': + opt_quiet = 1; + break; + default: + goto usage; + } + } + + if (optind < argc) { +usage: fprintf(stderr, "sm-notify [-d]\n"); + return 1; + } + + if (opt_srcaddr) { + strncpy(nsm_hostname, opt_srcaddr, sizeof(nsm_hostname)-1); + } else + if (gethostname(nsm_hostname, sizeof(nsm_hostname)) < 0) { + perror("gethostname"); + return 1; + } + + backup_hosts(_SM_DIR_PATH, _SM_BAK_PATH); + get_hosts(_SM_BAK_PATH); + + if (!opt_debug) { + if (!opt_quiet) + printf("Backgrounding to notify hosts...\n"); + + openlog("sm-notify", LOG_PID, LOG_DAEMON); + log_syslog = 1; + + if (daemon(0, 0) < 0) { + nsm_log(LOG_WARNING, "unable to background: %s", + strerror(errno)); + return 1; + } + + close(0); + close(1); + close(2); + } + + /* Get and update the NSM state. This will call sync() */ + nsm_state = nsm_get_state(opt_update_state); + + notify(); + + if (hosts) { + struct nsm_host *hp; + + while ((hp = hosts) != 0) { + hosts = hp->next; + nsm_log(LOG_NOTICE, + "Unable to notify %s, giving up", + hp->name); + } + return 1; + } + + return 0; +} + +/* + * Notify hosts + */ +void +notify(void) +{ + nsm_address local_addr; + time_t failtime = 0; + int sock = -1; + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + perror("socket"); + exit(1); + } + fcntl(sock, F_SETFL, O_NONBLOCK); + + memset(&local_addr, 0, sizeof(local_addr)); + local_addr.ss_family = AF_INET; /* Default to IPv4 */ + + /* Bind source IP if provided on command line */ + if (opt_srcaddr) { + if (!addr_parse(AF_INET, opt_srcaddr, &local_addr) + && !host_lookup(AF_INET, opt_srcaddr, &local_addr)) { + nsm_log(LOG_WARNING, + "Not a valid hostname or address: \"%s\"\n", + opt_srcaddr); + exit(1); + } + /* We know it's IPv4 at this point */ + } + + /* Use source port if provided on the command line, + * otherwise use bindresvport */ + if (opt_srcport) { + addr_set_port(&local_addr, opt_srcport); + if (bind(sock, (struct sockaddr *) &local_addr, sizeof(local_addr)) < 0) { + perror("bind"); + exit(1); + } + } else { + (void) bindresvport(sock, (struct sockaddr_in *) &local_addr); + } + + if (opt_max_retry) + failtime = time(NULL) + opt_max_retry; + + while (hosts) { + struct pollfd pfd; + time_t now = time(NULL); + unsigned int sent = 0; + struct nsm_host *hp; + long wait; + + if (failtime && now >= failtime) + break; + + while ((wait = hosts->send_next - now) <= 0) { + /* Never send more than 10 packets at once */ + if (sent++ >= 10) + break; + + /* Remove queue head */ + hp = hosts; + hosts = hp->next; + + notify_host(sock, hp); + + /* Set the timeout for this call, using an + exponential timeout strategy */ + wait = hp->timeout; + if ((hp->timeout <<= 1) > NSM_MAX_TIMEOUT) + hp->timeout = NSM_MAX_TIMEOUT; + hp->send_next = now + wait; + hp->retries++; + + insert_host(hp); + } + + nsm_log(LOG_DEBUG, "Host %s due in %ld seconds", + hosts->name, wait); + + pfd.fd = sock; + pfd.events = POLLIN; + + wait *= 1000; + if (wait < 100) + wait = 100; + if (poll(&pfd, 1, wait) != 1) + continue; + + recv_reply(sock); + } +} + +/* + * Send notification to a single host + */ +void +notify_host(int sock, struct nsm_host *host) +{ + static unsigned int xid = 0; + nsm_address dest; + uint32_t msgbuf[MAXMSGSIZE], *p; + unsigned int len; + + if (!xid) + xid = getpid() + time(NULL); + if (!host->xid) + host->xid = xid++; + + memset(msgbuf, 0, sizeof(msgbuf)); + p = msgbuf; + *p++ = htonl(host->xid); + *p++ = 0; + *p++ = htonl(2); + + /* If we retransmitted 4 times, reset the port to force + * a new portmap lookup (in case statd was restarted) + */ + if (host->retries >= 4) { + addr_set_port(&host->addr, 0); + host->retries = 0; + } + + dest = host->addr; + if (addr_get_port(&dest) == 0) { + /* Build a PMAP packet */ + nsm_log(LOG_DEBUG, "Sending portmap query to %s", host->name); + + addr_set_port(&dest, 111); + *p++ = htonl(100000); + *p++ = htonl(2); + *p++ = htonl(3); + + /* Auth and verf */ + *p++ = 0; *p++ = 0; + *p++ = 0; *p++ = 0; + + *p++ = htonl(NSM_PROGRAM); + *p++ = htonl(NSM_VERSION); + *p++ = htonl(IPPROTO_UDP); + *p++ = 0; + } else { + /* Build an SM_NOTIFY packet */ + nsm_log(LOG_DEBUG, "Sending SM_NOTIFY to %s", host->name); + + *p++ = htonl(NSM_PROGRAM); + *p++ = htonl(NSM_VERSION); + *p++ = htonl(NSM_NOTIFY); + + /* Auth and verf */ + *p++ = 0; *p++ = 0; + *p++ = 0; *p++ = 0; + + /* state change */ + len = strlen(nsm_hostname); + *p++ = htonl(len); + memcpy(p, nsm_hostname, len); + p += (len + 3) >> 2; + *p++ = htonl(nsm_state); + } + len = (p - msgbuf) << 2; + + sendto(sock, msgbuf, len, 0, (struct sockaddr *) &dest, sizeof(dest)); +} + +/* + * Receive reply from remote host + */ +void +recv_reply(int sock) +{ + struct nsm_host *hp; + uint32_t msgbuf[MAXMSGSIZE], *p, *end; + uint32_t xid; + int res; + + res = recv(sock, msgbuf, sizeof(msgbuf), 0); + if (res < 0) + return; + + nsm_log(LOG_DEBUG, "Received packet..."); + + p = msgbuf; + end = p + (res >> 2); + + xid = ntohl(*p++); + if (*p++ != htonl(1) /* must be REPLY */ + || *p++ != htonl(0) /* must be ACCEPTED */ + || *p++ != htonl(0) /* must be NULL verifier */ + || *p++ != htonl(0) + || *p++ != htonl(0)) /* must be SUCCESS */ + return; + + /* Before we look at the data, find the host struct for + this reply */ + if ((hp = find_host(xid)) == NULL) + return; + + if (addr_get_port(&hp->addr) == 0) { + /* This was a portmap request */ + unsigned int port; + + port = ntohl(*p++); + if (p > end) + goto fail; + + hp->send_next = time(NULL); + if (port == 0) { + /* No binding for statd. Delay the next + * portmap query for max timeout */ + nsm_log(LOG_DEBUG, "No statd on %s", hp->name); + hp->timeout = NSM_MAX_TIMEOUT; + hp->send_next += NSM_MAX_TIMEOUT; + } else { + addr_set_port(&hp->addr, port); + if (hp->timeout >= NSM_MAX_TIMEOUT / 4) + hp->timeout = NSM_MAX_TIMEOUT / 4; + } + hp->xid = 0; + } else { + /* Successful NOTIFY call. Server returns void, + * so nothing we need to do here (except + * check that we didn't read past the end of the + * packet) + */ + if (p <= end) { + nsm_log(LOG_DEBUG, "Host %s notified successfully", hp->name); + unlink(hp->path); + free(hp->name); + free(hp->path); + free(hp); + return; + } + } + +fail: /* Re-insert the host */ + insert_host(hp); +} + +/* + * Back up all hosts from the sm directory to sm.bak + */ +static void +backup_hosts(const char *dirname, const char *bakname) +{ + struct dirent *de; + DIR *dir; + + if (!(dir = opendir(dirname))) { + perror(dirname); + return; + } + + while ((de = readdir(dir)) != NULL) { + char src[1024], dst[1024]; + + if (de->d_name[0] == '.') + continue; + + snprintf(src, sizeof(src), "%s/%s", dirname, de->d_name); + snprintf(dst, sizeof(dst), "%s/%s", bakname, de->d_name); + if (rename(src, dst) < 0) { + nsm_log(LOG_WARNING, + "Failed to rename %s -> %s: %m", + src, dst); + } + } + closedir(dir); +} + +/* + * Get all entries from sm.bak and convert them to host names + */ +static void +get_hosts(const char *dirname) +{ + struct nsm_host *host; + struct dirent *de; + DIR *dir; + + if (!(dir = opendir(dirname))) { + perror(dirname); + return; + } + + host = NULL; + while ((de = readdir(dir)) != NULL) { + struct stat stb; + char path[1024]; + + if (de->d_name[0] == '.') + continue; + if (host == NULL) + host = calloc(1, sizeof(*host)); + + snprintf(path, sizeof(path), "%s/%s", dirname, de->d_name); + if (!addr_parse(AF_INET, de->d_name, &host->addr) + && !addr_parse(AF_INET6, de->d_name, &host->addr) + && !host_lookup(AF_INET, de->d_name, &host->addr)) { + nsm_log(LOG_WARNING, + "%s doesn't seem to be a valid address, skipped", + de->d_name); + unlink(path); + continue; + } + + if (stat(path, &stb) < 0) + continue; + host->last_used = stb.st_mtime; + host->timeout = NSM_TIMEOUT; + host->path = strdup(path); + host->name = strdup(de->d_name); + + insert_host(host); + host = NULL; + } + closedir(dir); + + if (host) + free(host); +} + +/* + * Insert host into sorted list + */ +void +insert_host(struct nsm_host *host) +{ + struct nsm_host **where, *p; + + where = &hosts; + while ((p = *where) != 0) { + /* Sort in ascending order of timeout */ + if (host->send_next < p->send_next) + break; + /* If we have the same timeout, put the + * most recently used host first. + * This makes sure that "recent" hosts + * get notified first. + */ + if (host->send_next == p->send_next + && host->last_used > p->last_used) + break; + where = &p->next; + } + + host->next = *where; + *where = host; +} + +/* + * Find host given the XID + */ +struct nsm_host * +find_host(uint32_t xid) +{ + struct nsm_host **where, *p; + + where = &hosts; + while ((p = *where) != 0) { + if (p->xid == xid) { + *where = p->next; + return p; + } + where = &p->next; + } + return NULL; +} + + +/* + * Retrieve the current NSM state + */ +unsigned int +nsm_get_state(int update) +{ + char newfile[PATH_MAX]; + int fd, state; + + if ((fd = open(_SM_STATE_PATH, O_RDONLY)) < 0) { + if (!opt_quiet) { + nsm_log(LOG_WARNING, "%s: %m", _SM_STATE_PATH); + nsm_log(LOG_WARNING, "Creating %s, set initial state 1", + _SM_STATE_PATH); + } + state = 1; + update = 1; + } else { + if (read(fd, &state, sizeof(state)) != sizeof(state)) { + nsm_log(LOG_WARNING, + "%s: bad file size, setting state = 1", + _SM_STATE_PATH); + state = 1; + update = 1; + } else { + if (!(state & 1)) + state += 1; + } + close(fd); + } + + if (update) { + state += 2; + snprintf(newfile, sizeof(newfile), + "%s.new", _SM_STATE_PATH); + if ((fd = open(newfile, O_CREAT|O_WRONLY, 0644)) < 0) { + nsm_log(LOG_WARNING, "Cannot create %s: %m", newfile); + exit(1); + } + if (write(fd, &state, sizeof(state)) != sizeof(state)) { + nsm_log(LOG_WARNING, + "Failed to write state to %s", newfile); + exit(1); + } + close(fd); + if (rename(newfile, _SM_STATE_PATH) < 0) { + nsm_log(LOG_WARNING, + "Cannot create %s: %m", _SM_STATE_PATH); + exit(1); + } + sync(); + } + + return state; +} + +/* + * Address handling utilities + */ +static int +addr_parse(int af, const char *name, nsm_address *addr) +{ + void *ptr; + + if (af == AF_INET) + ptr = &((struct sockaddr_in *) addr)->sin_addr; + else if (af == AF_INET6) + ptr = &((struct sockaddr_in6 *) addr)->sin6_addr; + else + return 0; + if (inet_pton(af, name, ptr) <= 0) + return 0; + ((struct sockaddr *) addr)->sa_family = af; + return 1; +} + +int +addr_get_port(nsm_address *addr) +{ + switch (((struct sockaddr *) addr)->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *) addr)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *) addr)->sin6_port); + } + return 0; +} + +static void +addr_set_port(nsm_address *addr, int port) +{ + switch (((struct sockaddr *) addr)->sa_family) { + case AF_INET: + ((struct sockaddr_in *) addr)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *) addr)->sin6_port = htons(port); + } +} + +static int +host_lookup(int af, const char *name, nsm_address *addr) +{ + struct addrinfo hints, *ai; + int okay = 0; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = af; + + if (getaddrinfo(name, NULL, &hints, &ai) != 0) + return 0; + + if (ai->ai_addrlen < sizeof(*addr)) { + memcpy(addr, ai->ai_addr, ai->ai_addrlen); + okay = 1; + } + + freeaddrinfo(ai); + return okay; +} + +/* + * Log a message + */ +void +nsm_log(int fac, const char *fmt, ...) +{ + va_list ap; + + if (fac == LOG_DEBUG && !opt_debug) + return; + + va_start(ap, fmt); + if (log_syslog) + vsyslog(fac, fmt, ap); + else { + vfprintf(stderr, fmt, ap); + fputs("\n", stderr); + } + va_end(ap); +} -- 2.39.2