3 * Handle communication with knfsd internal cache
5 * We open /proc/net/rpc/{auth.unix.ip,nfsd.export,nfsd.fh}/channel
6 * and listen for requests (using my_svc_run)
14 #include <sys/types.h>
15 #include <sys/select.h>
19 #include <netinet/in.h>
20 #include <arpa/inet.h>
34 #include "pseudoflavors.h"
37 #include "blkid/blkid.h"
41 * Invoked by RPC service loop
43 void cache_set_fds(fd_set *fdset);
44 int cache_process_req(fd_set *readfds);
58 * Support routines for text-based upcalls.
59 * Fields are separated by spaces.
60 * Fields are either mangled to quote space tab newline slosh with slosh
61 * or a hexified with a leading \x
62 * Record is terminated with newline.
65 static int cache_export_ent(char *domain, struct exportent *exp, char *p);
67 #define INITIAL_MANAGED_GROUPS 100
71 extern int use_ipaddr;
73 static void auth_unix_ip(FILE *f)
77 * Ignore if class != "nfsd"
78 * Otherwise find domainname and write back:
80 * "nfsd" IP-ADDR expiry domainname
84 char ipaddr[INET6_ADDRSTRLEN];
86 struct addrinfo *tmp = NULL;
87 struct addrinfo *ai = NULL;
88 if (readline(fileno(f), &lbuf, &lbuflen) != 1)
91 xlog(D_CALL, "auth_unix_ip: inbuf '%s'", lbuf);
95 if (qword_get(&cp, class, 20) <= 0 ||
96 strcmp(class, "nfsd") != 0)
99 if (qword_get(&cp, ipaddr, sizeof(ipaddr)) <= 0)
102 tmp = host_pton(ipaddr);
108 /* addr is a valid, interesting address, find the domain name... */
110 ai = client_resolve(tmp->ai_addr);
111 client = client_compose(ai);
116 qword_print(f, "nfsd");
117 qword_print(f, ipaddr);
118 qword_printuint(f, time(0) + DEFAULT_TTL);
120 qword_print(f, ipaddr);
122 qword_print(f, *client?client:"DEFAULT");
124 xlog(D_CALL, "auth_unix_ip: client %p '%s'", client, client?client: "DEFAULT");
129 static void auth_unix_gid(FILE *f)
134 * uid expiry count list of group ids
138 static gid_t *groups = NULL;
139 static int groups_len = 0;
145 if (groups_len == 0) {
146 groups = malloc(sizeof(gid_t) * INITIAL_MANAGED_GROUPS);
150 groups_len = INITIAL_MANAGED_GROUPS;
153 ngroups = groups_len;
155 if (readline(fileno(f), &lbuf, &lbuflen) != 1)
159 if (qword_get_uint(&cp, &uid) != 0)
166 rv = getgrouplist(pw->pw_name, pw->pw_gid, groups, &ngroups);
167 if (rv == -1 && ngroups >= groups_len) {
168 more_groups = realloc(groups, sizeof(gid_t)*ngroups);
172 groups = more_groups;
173 groups_len = ngroups;
174 rv = getgrouplist(pw->pw_name, pw->pw_gid,
179 qword_printuint(f, uid);
180 qword_printuint(f, time(0) + DEFAULT_TTL);
182 qword_printuint(f, ngroups);
183 for (i=0; i<ngroups; i++)
184 qword_printuint(f, groups[i]);
186 qword_printuint(f, 0);
191 static const char *get_uuid_blkdev(char *path)
193 /* We set *safe if we know that we need the
194 * fsid from statfs too.
196 static blkid_cache cache = NULL;
199 blkid_tag_iterate iter;
202 const char *val, *uuid = NULL;
205 blkid_get_cache(&cache, NULL);
207 if (stat(path, &stb) != 0)
209 devname = blkid_devno_to_devname(stb.st_dev);
212 dev = blkid_get_dev(cache, devname, BLKID_DEV_NORMAL);
216 iter = blkid_tag_iterate_begin(dev);
219 while (blkid_tag_next(iter, &type, &val) == 0) {
220 if (strcmp(type, "UUID") == 0)
222 if (strcmp(type, "TYPE") == 0 &&
223 strcmp(val, "btrfs") == 0) {
228 blkid_tag_iterate_end(iter);
232 #define get_uuid_blkdev(path) (NULL)
235 static int get_uuid(const char *val, int uuidlen, char *u)
237 /* extract hex digits from uuidstr and compose a uuid
238 * of the given length (max 16), xoring bytes to make
243 memset(u, 0, uuidlen);
244 for ( ; *val ; val++) {
265 static int uuid_by_path(char *path, int type, int uuidlen, char *uuid)
267 /* get a uuid for the filesystem found at 'path'.
268 * There are several possible ways of generating the
270 * Type 0 is used for new filehandles, while other types
271 * may be used to interpret old filehandle - to ensure smooth
273 * We return 1 if a uuid was found (and it might be worth
274 * trying the next type) or 0 if no more uuid types can be
278 /* Possible sources of uuid are
282 * On some filesystems (e.g. vfat) the statfs64 uuid is simply an
283 * encoding of the device that the filesystem is mounted from, so
284 * it we be very bad to use that (as device numbers change). blkid
286 * On other filesystems (e.g. btrfs) the statfs64 uuid contains
287 * important info that the blkid uuid cannot contain: This happens
288 * when multiple subvolumes are exported (they have the same
289 * blkid uuid but different statfs64 uuids).
290 * We rely on get_uuid_blkdev *knowing* which is which and not returning
291 * a uuid for filesystems where the statfs64 uuid is better.
296 const char *blkid_val;
299 blkid_val = get_uuid_blkdev(path);
301 if (statfs64(path, &st) == 0 &&
302 (st.f_fsid.__val[0] || st.f_fsid.__val[1]))
303 snprintf(fsid_val, 17, "%08x%08x",
304 st.f_fsid.__val[0], st.f_fsid.__val[1]);
308 if (blkid_val && (type--) == 0)
310 else if (fsid_val[0] && (type--) == 0)
315 get_uuid(val, uuidlen, uuid);
319 /* Iterate through /etc/mtab, finding mountpoints
320 * at or below a given path
322 static char *next_mnt(void **v, char *p)
328 f = setmntent("/etc/mtab", "r");
332 while ((me = getmntent(f)) != NULL &&
333 (strncmp(me->mnt_dir, p, l) != 0 ||
334 me->mnt_dir[l] != '/'))
344 /* True iff e1 is a child of e2 and e2 has crossmnt set: */
345 static bool subexport(struct exportent *e1, struct exportent *e2)
347 char *p1 = e1->e_path, *p2 = e2->e_path;
350 return e2->e_flags & NFSEXP_CROSSMOUNT
351 && strncmp(p1, p2, l2) == 0
357 /* We could use a union for this, but it would be more
358 * complicated; why bother? */
362 unsigned int fsidnum;
367 int parse_fsid(int fsidtype, int fsidlen, char *fsid, struct parsed_fsid *parsed)
370 unsigned long long inode64;
372 parsed->fsidtype = fsidtype;
374 case FSID_DEV: /* 4 bytes: 2 major, 2 minor, 4 inode */
377 memcpy(&dev, fsid, 4);
378 memcpy(&parsed->inode, fsid+4, 4);
379 parsed->major = ntohl(dev)>>16;
380 parsed->minor = ntohl(dev) & 0xFFFF;
383 case FSID_NUM: /* 4 bytes - fsid */
386 memcpy(&parsed->fsidnum, fsid, 4);
389 case FSID_MAJOR_MINOR: /* 12 bytes: 4 major, 4 minor, 4 inode
390 * This format is never actually used but was
391 * an historical accident
395 memcpy(&dev, fsid, 4);
396 parsed->major = ntohl(dev);
397 memcpy(&dev, fsid+4, 4);
398 parsed->minor = ntohl(dev);
399 memcpy(&parsed->inode, fsid+8, 4);
402 case FSID_ENCODE_DEV: /* 8 bytes: 4 byte packed device number, 4 inode */
403 /* This is *host* endian, not net-byte-order, because
404 * no-one outside this host has any business interpreting it
408 memcpy(&dev, fsid, 4);
409 memcpy(&parsed->inode, fsid+4, 4);
410 parsed->major = (dev & 0xfff00) >> 8;
411 parsed->minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
414 case FSID_UUID4_INUM: /* 4 byte inode number and 4 byte uuid */
417 memcpy(&parsed->inode, fsid, 4);
419 parsed->fhuuid = fsid+4;
421 case FSID_UUID8: /* 8 byte uuid */
425 parsed->fhuuid = fsid;
427 case FSID_UUID16: /* 16 byte uuid */
430 parsed->uuidlen = 16;
431 parsed->fhuuid = fsid;
433 case FSID_UUID16_INUM: /* 8 byte inode number and 16 byte uuid */
436 memcpy(&inode64, fsid, 8);
437 parsed->inode = inode64;
438 parsed->uuidlen = 16;
439 parsed->fhuuid = fsid+8;
445 static bool match_fsid(struct parsed_fsid *parsed, nfs_export *exp, char *path)
451 if (stat(path, &stb) != 0)
453 if (!S_ISDIR(stb.st_mode) && !S_ISREG(stb.st_mode))
456 switch (parsed->fsidtype) {
458 case FSID_MAJOR_MINOR:
459 case FSID_ENCODE_DEV:
460 if (stb.st_ino != parsed->inode)
462 if (parsed->major != major(stb.st_dev) ||
463 parsed->minor != minor(stb.st_dev))
467 if (((exp->m_export.e_flags & NFSEXP_FSID) == 0 ||
468 exp->m_export.e_fsid != parsed->fsidnum))
471 case FSID_UUID4_INUM:
472 case FSID_UUID16_INUM:
473 if (stb.st_ino != parsed->inode)
478 if (!is_mountpoint(path))
481 if (exp->m_export.e_uuid)
482 get_uuid(exp->m_export.e_uuid, parsed->uuidlen, u);
485 uuid_by_path(path, type, parsed->uuidlen, u);
487 if (memcmp(u, parsed->fhuuid, parsed->uuidlen) == 0)
490 if (memcmp(u, parsed->fhuuid, parsed->uuidlen) != 0)
494 /* Well, unreachable, actually: */
498 static void nfsd_fh(FILE *f)
501 * domain fsidtype fsid
502 * interpret fsid, find export point and options, and write:
503 * domain fsidtype fsid expiry path
510 struct parsed_fsid parsed;
511 struct exportent *found = NULL;
512 struct addrinfo *ai = NULL;
513 char *found_path = NULL;
518 if (readline(fileno(f), &lbuf, &lbuflen) != 1)
521 xlog(D_CALL, "nfsd_fh: inbuf '%s'", lbuf);
525 dom = malloc(strlen(cp));
528 if (qword_get(&cp, dom, strlen(cp)) <= 0)
530 if (qword_get_int(&cp, &fsidtype) != 0)
532 if (fsidtype < 0 || fsidtype > 7)
533 goto out; /* unknown type */
534 if ((fsidlen = qword_get(&cp, fsid, 32)) <= 0)
536 if (parse_fsid(fsidtype, fsidlen, fsid, &parsed))
541 /* Now determine export point for this fsid/domain */
542 for (i=0 ; i < MCL_MAXTYPES; i++) {
543 nfs_export *next_exp;
544 for (exp = exportlist[i].p_head; exp; exp = next_exp) {
547 if (exp->m_export.e_flags & NFSEXP_CROSSMOUNT) {
548 static nfs_export *prev = NULL;
549 static void *mnt = NULL;
553 path = next_mnt(&mnt, exp->m_export.e_path);
555 next_exp = exp->m_next;
563 path = exp->m_export.e_path;
567 path = exp->m_export.e_path;
568 next_exp = exp->m_next;
571 if (!use_ipaddr && !client_member(dom, exp->m_client->m_hostname))
573 if (exp->m_export.e_mountpoint &&
574 !is_mountpoint(exp->m_export.e_mountpoint[0]?
575 exp->m_export.e_mountpoint:
576 exp->m_export.e_path))
579 if (!match_fsid(&parsed, exp, path))
583 struct addrinfo *tmp;
584 tmp = host_pton(dom);
587 ai = client_resolve(tmp->ai_addr);
590 if (!client_check(exp->m_client, ai))
593 if (!found || subexport(&exp->m_export, found)) {
594 found = &exp->m_export;
596 found_path = strdup(path);
597 if (found_path == NULL)
599 } else if (strcmp(found->e_path, exp->m_export.e_path)
600 && !subexport(found, &exp->m_export))
602 xlog(L_WARNING, "%s and %s have same filehandle for %s, using first",
603 found_path, path, dom);
608 found->e_mountpoint &&
609 !is_mountpoint(found->e_mountpoint[0]?
612 /* Cannot export this yet
613 * should log a warning, but need to rate limit
614 xlog(L_WARNING, "%s not exported as %d not a mountpoint",
615 found->e_path, found->e_mountpoint);
617 /* FIXME we need to make sure we re-visit this later */
620 if (!found && dev_missing) {
621 /* The missing dev could be what we want, so just be
622 * quite rather than returning stale yet
628 if (cache_export_ent(dom, found, found_path) < 0)
632 qword_printint(f, fsidtype);
633 qword_printhex(f, fsid, fsidlen);
634 /* The fsid -> path lookup can be quite expensive as it
635 * potentially stats and reads lots of devices, and some of those
636 * might have spun-down. The Answer is not likely to
637 * change underneath us, and an 'exportfs -f' can always
638 * remove this from the kernel, so use a really log
639 * timeout. Maybe this should be configurable on the command
642 qword_printint(f, 0x7fffffff);
644 qword_print(f, found_path);
651 xlog(D_CALL, "nfsd_fh: found %p path %s", found, found ? found->e_path : NULL);
655 static void write_fsloc(FILE *f, struct exportent *ep)
657 struct servers *servers;
659 if (ep->e_fslocmethod == FSLOC_NONE)
662 servers = replicas_lookup(ep->e_fslocmethod, ep->e_fslocdata);
665 qword_print(f, "fsloc");
666 qword_printint(f, servers->h_num);
667 if (servers->h_num >= 0) {
669 for (i=0; i<servers->h_num; i++) {
670 qword_print(f, servers->h_mp[i]->h_host);
671 qword_print(f, servers->h_mp[i]->h_path);
674 qword_printint(f, servers->h_referral);
675 release_replicas(servers);
678 static void write_secinfo(FILE *f, struct exportent *ep, int flag_mask)
682 for (p = ep->e_secinfo; p->flav; p++)
684 if (p == ep->e_secinfo) {
685 /* There was no sec= option */
688 qword_print(f, "secinfo");
689 qword_printint(f, p - ep->e_secinfo);
690 for (p = ep->e_secinfo; p->flav; p++) {
691 qword_printint(f, p->flav->fnum);
692 qword_printint(f, p->flags & flag_mask);
697 static int dump_to_cache(FILE *f, char *domain, char *path, struct exportent *exp)
699 qword_print(f, domain);
700 qword_print(f, path);
702 int different_fs = strcmp(path, exp->e_path) != 0;
703 int flag_mask = different_fs ? ~NFSEXP_FSID : ~0;
705 qword_printuint(f, time(0) + exp->e_ttl);
706 qword_printint(f, exp->e_flags & flag_mask);
707 qword_printint(f, exp->e_anonuid);
708 qword_printint(f, exp->e_anongid);
709 qword_printint(f, exp->e_fsid);
711 write_secinfo(f, exp, flag_mask);
712 if (exp->e_uuid == NULL || different_fs) {
714 if (uuid_by_path(path, 0, 16, u)) {
715 qword_print(f, "uuid");
716 qword_printhex(f, u, 16);
720 get_uuid(exp->e_uuid, 16, u);
721 qword_print(f, "uuid");
722 qword_printhex(f, u, 16);
725 qword_printuint(f, time(0) + DEFAULT_TTL);
729 static int is_subdirectory(char *child, char *parent)
731 int l = strlen(parent);
733 return strcmp(child, parent) == 0
734 || (strncmp(child, parent, l) == 0 && child[l] == '/');
737 static int path_matches(nfs_export *exp, char *path)
739 if (exp->m_export.e_flags & NFSEXP_CROSSMOUNT)
740 return is_subdirectory(path, exp->m_export.e_path);
741 return strcmp(path, exp->m_export.e_path) == 0;
745 client_matches(nfs_export *exp, char *dom, struct addrinfo *ai)
748 return client_check(exp->m_client, ai);
749 return client_member(dom, exp->m_client->m_hostname);
753 export_matches(nfs_export *exp, char *dom, char *path, struct addrinfo *ai)
755 return path_matches(exp, path) && client_matches(exp, dom, ai);
759 lookup_export(char *dom, char *path, struct addrinfo *ai)
762 nfs_export *found = NULL;
766 for (i=0 ; i < MCL_MAXTYPES; i++) {
767 for (exp = exportlist[i].p_head; exp; exp = exp->m_next) {
768 if (!export_matches(exp, dom, path, ai))
776 /* Always prefer non-V4ROOT mounts */
777 if (found->m_export.e_flags & NFSEXP_V4ROOT)
780 /* If one is a CROSSMOUNT, then prefer the longest path */
781 if (((found->m_export.e_flags & NFSEXP_CROSSMOUNT) ||
782 (exp->m_export.e_flags & NFSEXP_CROSSMOUNT)) &&
783 strlen(found->m_export.e_path) !=
784 strlen(exp->m_export.e_path)) {
786 if (strlen(exp->m_export.e_path) >
787 strlen(found->m_export.e_path)) {
793 } else if (found_type == i && found->m_warned == 0) {
794 xlog(L_WARNING, "%s exported to both %s and %s, "
795 "arbitrarily choosing options from first",
796 path, found->m_client->m_hostname, exp->m_client->m_hostname,
805 static void nfsd_export(FILE *f)
809 * determine export options and return:
810 * domain path expiry flags anonuid anongid fsid
815 nfs_export *found = NULL;
816 struct addrinfo *ai = NULL;
818 if (readline(fileno(f), &lbuf, &lbuflen) != 1)
821 xlog(D_CALL, "nfsd_export: inbuf '%s'", lbuf);
824 dom = malloc(strlen(cp));
825 path = malloc(strlen(cp));
830 if (qword_get(&cp, dom, strlen(lbuf)) <= 0)
832 if (qword_get(&cp, path, strlen(lbuf)) <= 0)
838 struct addrinfo *tmp;
839 tmp = host_pton(dom);
842 ai = client_resolve(tmp->ai_addr);
847 found = lookup_export(dom, path, ai);
850 if (dump_to_cache(f, dom, path, &found->m_export) < 0) {
852 "Cannot export %s, possibly unsupported filesystem"
853 " or fsid= required", path);
854 dump_to_cache(f, dom, path, NULL);
857 dump_to_cache(f, dom, path, NULL);
860 xlog(D_CALL, "nfsd_export: found %p path %s", found, path ? path : NULL);
862 if (path) free(path);
869 void (*cache_handle)(FILE *f);
871 char vbuf[RPC_CHAN_BUF_SIZE];
873 { "auth.unix.ip", auth_unix_ip, NULL, ""},
874 { "auth.unix.gid", auth_unix_gid, NULL, ""},
875 { "nfsd.export", nfsd_export, NULL, ""},
876 { "nfsd.fh", nfsd_fh, NULL, ""},
877 { NULL, NULL, NULL, ""}
880 extern int manage_gids;
883 * cache_open - prepare communications channels with kernel RPC caches
886 void cache_open(void)
889 for (i=0; cachelist[i].cache_name; i++ ) {
891 if (!manage_gids && cachelist[i].cache_handle == auth_unix_gid)
893 sprintf(path, "/proc/net/rpc/%s/channel", cachelist[i].cache_name);
894 cachelist[i].f = fopen(path, "r+");
895 if (cachelist[i].f != NULL) {
896 setvbuf(cachelist[i].f, cachelist[i].vbuf, _IOLBF,
903 * cache_set_fds - prepare cache file descriptors for one iteration of the service loop
904 * @fdset: pointer to fd_set to prepare
906 void cache_set_fds(fd_set *fdset)
909 for (i=0; cachelist[i].cache_name; i++) {
911 FD_SET(fileno(cachelist[i].f), fdset);
916 * cache_process_req - process any active cache file descriptors during service loop iteration
917 * @fdset: pointer to fd_set to examine for activity
919 int cache_process_req(fd_set *readfds)
923 for (i=0; cachelist[i].cache_name; i++) {
924 if (cachelist[i].f != NULL &&
925 FD_ISSET(fileno(cachelist[i].f), readfds)) {
927 cachelist[i].cache_handle(cachelist[i].f);
928 FD_CLR(fileno(cachelist[i].f), readfds);
936 * Give IP->domain and domain+path->options to kernel
937 * % echo nfsd $IP $[now+DEFAULT_TTL] $domain > /proc/net/rpc/auth.unix.ip/channel
938 * % echo $domain $path $[now+DEFAULT_TTL] $options $anonuid $anongid $fsid > /proc/net/rpc/nfsd.export/channel
941 static int cache_export_ent(char *domain, struct exportent *exp, char *path)
944 FILE *f = fopen("/proc/net/rpc/nfsd.export/channel", "w");
948 err = dump_to_cache(f, domain, exp->e_path, exp);
951 "Cannot export %s, possibly unsupported filesystem or"
952 " fsid= required", exp->e_path);
955 while (err == 0 && (exp->e_flags & NFSEXP_CROSSMOUNT) && path) {
956 /* really an 'if', but we can break out of
957 * a 'while' more easily */
958 /* Look along 'path' for other filesystems
959 * and export them with the same options
962 size_t l = strlen(exp->e_path);
965 if (strlen(path) <= l || path[l] != '/' ||
966 strncmp(exp->e_path, path, l) != 0)
968 if (stat(exp->e_path, &stb) != 0)
971 while(path[l] == '/') {
973 /* errors for submount should fail whole filesystem */
977 while (path[l] != '/' && path[l])
981 err2 = lstat(path, &stb);
985 if (stb.st_dev == dev)
989 dump_to_cache(f, domain, path, exp);
1000 * cache_export - Inform kernel of a new nfs_export
1001 * @exp: target nfs_export
1002 * @path: NUL-terminated C string containing export path
1004 int cache_export(nfs_export *exp, char *path)
1006 char buf[INET6_ADDRSTRLEN];
1010 f = fopen("/proc/net/rpc/auth.unix.ip/channel", "w");
1015 qword_print(f, "nfsd");
1017 host_ntop(get_addrlist(exp->m_client, 0), buf, sizeof(buf)));
1018 qword_printuint(f, time(0) + exp->m_export.e_ttl);
1019 qword_print(f, exp->m_client->m_hostname);
1024 err = cache_export_ent(exp->m_client->m_hostname, &exp->m_export, path)
1030 * cache_get_filehandle - given an nfs_export, get its root filehandle
1031 * @exp: target nfs_export
1032 * @len: length of requested file handle
1033 * @p: NUL-terminated C string containing export path
1035 * Returns pointer to NFS file handle of root directory of export
1038 * echo $domain $path $length
1039 * read filehandle <&0
1040 * } <> /proc/fs/nfsd/filehandle
1043 cache_get_filehandle(nfs_export *exp, int len, char *p)
1045 FILE *f = fopen("/proc/fs/nfsd/filehandle", "r+");
1049 static struct nfs_fh_len fh;
1052 f = fopen("/proc/fs/nfs/filehandle", "r+");
1056 qword_print(f, exp->m_client->m_hostname);
1058 qword_printint(f, len);
1059 failed = qword_eol(f);
1062 failed = (fgets(buf, sizeof(buf), f) == NULL);
1066 memset(fh.fh_handle, 0, sizeof(fh.fh_handle));
1067 fh.fh_size = qword_get(&bp, (char *)fh.fh_handle, NFS3_FHSIZE);