e1027f3d35d2dbec56883f34c43af04088016cd1
[nfs-utils.git] / utils / mountd / cache.c
1
2 /*
3  * Handle communication with knfsd internal cache
4  *
5  * We open /proc/net/rpc/{auth.unix.ip,nfsd.export,nfsd.fh}/channel
6  * and listen for requests (using my_svc_run)
7  * 
8  */
9
10 #ifdef HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <sys/types.h>
15 #include <sys/select.h>
16 #include <sys/stat.h>
17 #include <sys/vfs.h>
18 #include <time.h>
19 #include <netinet/in.h>
20 #include <arpa/inet.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <errno.h>
24 #include <ctype.h>
25 #include <pwd.h>
26 #include <grp.h>
27 #include <mntent.h>
28 #include "misc.h"
29 #include "nfslib.h"
30 #include "exportfs.h"
31 #include "mountd.h"
32 #include "fsloc.h"
33 #include "pseudoflavors.h"
34
35 #ifdef USE_BLKID
36 #include "blkid/blkid.h"
37 #endif
38
39 /*
40  * Invoked by RPC service loop
41  */
42 void    cache_set_fds(fd_set *fdset);
43 int     cache_process_req(fd_set *readfds);
44
45 enum nfsd_fsid {
46         FSID_DEV = 0,
47         FSID_NUM,
48         FSID_MAJOR_MINOR,
49         FSID_ENCODE_DEV,
50         FSID_UUID4_INUM,
51         FSID_UUID8,
52         FSID_UUID16,
53         FSID_UUID16_INUM,
54 };
55
56 /*
57  * Support routines for text-based upcalls.
58  * Fields are separated by spaces.
59  * Fields are either mangled to quote space tab newline slosh with slosh
60  * or a hexified with a leading \x
61  * Record is terminated with newline.
62  *
63  */
64 static int cache_export_ent(char *domain, struct exportent *exp, char *p);
65
66 #define INITIAL_MANAGED_GROUPS 100
67
68 char *lbuf  = NULL;
69 int lbuflen = 0;
70 extern int use_ipaddr;
71
72 static void auth_unix_ip(FILE *f)
73 {
74         /* requests are
75          *  class IP-ADDR
76          * Ignore if class != "nfsd"
77          * Otherwise find domainname and write back:
78          *
79          *  "nfsd" IP-ADDR expiry domainname
80          */
81         char *cp;
82         char class[20];
83         char ipaddr[INET6_ADDRSTRLEN + 1];
84         char *client = NULL;
85         struct addrinfo *tmp = NULL;
86         if (readline(fileno(f), &lbuf, &lbuflen) != 1)
87                 return;
88
89         xlog(D_CALL, "auth_unix_ip: inbuf '%s'", lbuf);
90
91         cp = lbuf;
92
93         if (qword_get(&cp, class, 20) <= 0 ||
94             strcmp(class, "nfsd") != 0)
95                 return;
96
97         if (qword_get(&cp, ipaddr, sizeof(ipaddr) - 1) <= 0)
98                 return;
99
100         tmp = host_pton(ipaddr);
101         if (tmp == NULL)
102                 return;
103
104         auth_reload();
105
106         /* addr is a valid, interesting address, find the domain name... */
107         if (!use_ipaddr) {
108                 struct addrinfo *ai = NULL;
109
110                 ai = client_resolve(tmp->ai_addr);
111                 if (ai) {
112                         client = client_compose(ai);
113                         freeaddrinfo(ai);
114                 }
115         }
116         qword_print(f, "nfsd");
117         qword_print(f, ipaddr);
118         qword_printtimefrom(f, DEFAULT_TTL);
119         if (use_ipaddr) {
120                 memmove(ipaddr + 1, ipaddr, strlen(ipaddr) + 1);
121                 ipaddr[0] = '$';
122                 qword_print(f, ipaddr);
123         } else if (client)
124                 qword_print(f, *client?client:"DEFAULT");
125         qword_eol(f);
126         xlog(D_CALL, "auth_unix_ip: client %p '%s'", client, client?client: "DEFAULT");
127
128         free(client);
129         freeaddrinfo(tmp);
130
131 }
132
133 static void auth_unix_gid(FILE *f)
134 {
135         /* Request are
136          *  uid
137          * reply is
138          *  uid expiry count list of group ids
139          */
140         uid_t uid;
141         struct passwd *pw;
142         static gid_t *groups = NULL;
143         static int groups_len = 0;
144         gid_t *more_groups;
145         int ngroups;
146         int rv, i;
147         char *cp;
148
149         if (groups_len == 0) {
150                 groups = malloc(sizeof(gid_t) * INITIAL_MANAGED_GROUPS);
151                 if (!groups)
152                         return;
153
154                 groups_len = INITIAL_MANAGED_GROUPS;
155         }
156
157         ngroups = groups_len;
158
159         if (readline(fileno(f), &lbuf, &lbuflen) != 1)
160                 return;
161
162         cp = lbuf;
163         if (qword_get_uint(&cp, &uid) != 0)
164                 return;
165
166         pw = getpwuid(uid);
167         if (!pw)
168                 rv = -1;
169         else {
170                 rv = getgrouplist(pw->pw_name, pw->pw_gid, groups, &ngroups);
171                 if (rv == -1 && ngroups >= groups_len) {
172                         more_groups = realloc(groups, sizeof(gid_t)*ngroups);
173                         if (!more_groups)
174                                 rv = -1;
175                         else {
176                                 groups = more_groups;
177                                 groups_len = ngroups;
178                                 rv = getgrouplist(pw->pw_name, pw->pw_gid,
179                                                   groups, &ngroups);
180                         }
181                 }
182         }
183         qword_printuint(f, uid);
184         qword_printtimefrom(f, DEFAULT_TTL);
185         if (rv >= 0) {
186                 qword_printuint(f, ngroups);
187                 for (i=0; i<ngroups; i++)
188                         qword_printuint(f, groups[i]);
189         } else
190                 qword_printuint(f, 0);
191         qword_eol(f);
192 }
193
194 #if USE_BLKID
195 static const char *get_uuid_blkdev(char *path)
196 {
197         /* We set *safe if we know that we need the
198          * fsid from statfs too.
199          */
200         static blkid_cache cache = NULL;
201         struct stat stb;
202         char *devname;
203         blkid_tag_iterate iter;
204         blkid_dev dev;
205         const char *type;
206         const char *val, *uuid = NULL;
207
208         if (cache == NULL)
209                 blkid_get_cache(&cache, NULL);
210
211         if (stat(path, &stb) != 0)
212                 return NULL;
213         devname = blkid_devno_to_devname(stb.st_dev);
214         if (!devname)
215                 return NULL;
216         dev = blkid_get_dev(cache, devname, BLKID_DEV_NORMAL);
217         free(devname);
218         if (!dev)
219                 return NULL;
220         iter = blkid_tag_iterate_begin(dev);
221         if (!iter)
222                 return NULL;
223         while (blkid_tag_next(iter, &type, &val) == 0) {
224                 if (strcmp(type, "UUID") == 0)
225                         uuid = val;
226                 if (strcmp(type, "TYPE") == 0 &&
227                     strcmp(val, "btrfs") == 0) {
228                         uuid = NULL;
229                         break;
230                 }
231         }
232         blkid_tag_iterate_end(iter);
233         return uuid;
234 }
235 #else
236 #define get_uuid_blkdev(path) (NULL)
237 #endif
238
239 static int get_uuid(const char *val, size_t uuidlen, char *u)
240 {
241         /* extract hex digits from uuidstr and compose a uuid
242          * of the given length (max 16), xoring bytes to make
243          * a smaller uuid.
244          */
245         size_t i = 0;
246         
247         memset(u, 0, uuidlen);
248         for ( ; *val ; val++) {
249                 int c = *val;
250                 if (!isxdigit(c))
251                         continue;
252                 if (isalpha(c)) {
253                         if (isupper(c))
254                                 c = c - 'A' + 10;
255                         else
256                                 c = c - 'a' + 10;
257                 } else
258                         c = c - '0' + 0;
259                 if ((i&1) == 0)
260                         c <<= 4;
261                 u[i/2] ^= (char)c;
262                 i++;
263                 if (i == uuidlen*2)
264                         i = 0;
265         }
266         return 1;
267 }
268
269 static int uuid_by_path(char *path, int type, size_t uuidlen, char *uuid)
270 {
271         /* get a uuid for the filesystem found at 'path'.
272          * There are several possible ways of generating the
273          * uuids (types).
274          * Type 0 is used for new filehandles, while other types
275          * may be used to interpret old filehandle - to ensure smooth
276          * forward migration.
277          * We return 1 if a uuid was found (and it might be worth 
278          * trying the next type) or 0 if no more uuid types can be
279          * extracted.
280          */
281
282         /* Possible sources of uuid are
283          * - blkid uuid
284          * - statfs64 uuid
285          *
286          * On some filesystems (e.g. vfat) the statfs64 uuid is simply an
287          * encoding of the device that the filesystem is mounted from, so
288          * it we be very bad to use that (as device numbers change).  blkid
289          * must be preferred.
290          * On other filesystems (e.g. btrfs) the statfs64 uuid contains
291          * important info that the blkid uuid cannot contain:  This happens
292          * when multiple subvolumes are exported (they have the same
293          * blkid uuid but different statfs64 uuids).
294          * We rely on get_uuid_blkdev *knowing* which is which and not returning
295          * a uuid for filesystems where the statfs64 uuid is better.
296          *
297          */
298         struct statfs64 st;
299         char fsid_val[17];
300         const char *blkid_val;
301         const char *val;
302
303         blkid_val = get_uuid_blkdev(path);
304
305         if (statfs64(path, &st) == 0 &&
306             (st.f_fsid.__val[0] || st.f_fsid.__val[1]))
307                 snprintf(fsid_val, 17, "%08x%08x",
308                          st.f_fsid.__val[0], st.f_fsid.__val[1]);
309         else
310                 fsid_val[0] = 0;
311
312         if (blkid_val && (type--) == 0)
313                 val = blkid_val;
314         else if (fsid_val[0] && (type--) == 0)
315                 val = fsid_val;
316         else
317                 return 0;
318
319         get_uuid(val, uuidlen, uuid);
320         return 1;
321 }
322
323 /* Iterate through /etc/mtab, finding mountpoints
324  * at or below a given path
325  */
326 static char *next_mnt(void **v, char *p)
327 {
328         FILE *f;
329         struct mntent *me;
330         size_t l = strlen(p);
331         if (*v == NULL) {
332                 f = setmntent("/etc/mtab", "r");
333                 *v = f;
334         } else
335                 f = *v;
336         while ((me = getmntent(f)) != NULL &&
337                (strncmp(me->mnt_dir, p, l) != 0 ||
338                 me->mnt_dir[l] != '/'))
339                 ;
340         if (me == NULL) {
341                 endmntent(f);
342                 *v = NULL;
343                 return NULL;
344         }
345         return me->mnt_dir;
346 }
347
348 static int is_subdirectory(char *child, char *parent)
349 {
350         size_t l = strlen(parent);
351
352         if (strcmp(parent, "/") == 0)
353                 return 1;
354
355         return strcmp(child, parent) == 0
356                 || (strncmp(child, parent, l) == 0 && child[l] == '/');
357 }
358
359 static int path_matches(nfs_export *exp, char *path)
360 {
361         if (exp->m_export.e_flags & NFSEXP_CROSSMOUNT)
362                 return is_subdirectory(path, exp->m_export.e_path);
363         return strcmp(path, exp->m_export.e_path) == 0;
364 }
365
366 static int
367 export_matches(nfs_export *exp, char *dom, char *path, struct addrinfo *ai)
368 {
369         return path_matches(exp, path) && client_matches(exp, dom, ai);
370 }
371
372 /* True iff e1 is a child of e2 and e2 has crossmnt set: */
373 static bool subexport(struct exportent *e1, struct exportent *e2)
374 {
375         char *p1 = e1->e_path, *p2 = e2->e_path;
376         size_t l2 = strlen(p2);
377
378         return e2->e_flags & NFSEXP_CROSSMOUNT
379                 && strncmp(p1, p2, l2) == 0
380                 && p1[l2] == '/';
381 }
382
383 struct parsed_fsid {
384         int fsidtype;
385         /* We could use a union for this, but it would be more
386          * complicated; why bother? */
387         unsigned int inode;
388         unsigned int minor;
389         unsigned int major;
390         unsigned int fsidnum;
391         size_t uuidlen;
392         char *fhuuid;
393 };
394
395 static int parse_fsid(int fsidtype, int fsidlen, char *fsid,
396                 struct parsed_fsid *parsed)
397 {
398         unsigned int dev;
399         unsigned long long inode64;
400
401         memset(parsed, 0, sizeof(*parsed));
402         parsed->fsidtype = fsidtype;
403         switch(fsidtype) {
404         case FSID_DEV: /* 4 bytes: 2 major, 2 minor, 4 inode */
405                 if (fsidlen != 8)
406                         return -1;
407                 memcpy(&dev, fsid, 4);
408                 memcpy(&parsed->inode, fsid+4, 4);
409                 parsed->major = ntohl(dev)>>16;
410                 parsed->minor = ntohl(dev) & 0xFFFF;
411                 break;
412
413         case FSID_NUM: /* 4 bytes - fsid */
414                 if (fsidlen != 4)
415                         return -1;
416                 memcpy(&parsed->fsidnum, fsid, 4);
417                 break;
418
419         case FSID_MAJOR_MINOR: /* 12 bytes: 4 major, 4 minor, 4 inode 
420                  * This format is never actually used but was
421                  * an historical accident
422                  */
423                 if (fsidlen != 12)
424                         return -1;
425                 memcpy(&dev, fsid, 4);
426                 parsed->major = ntohl(dev);
427                 memcpy(&dev, fsid+4, 4);
428                 parsed->minor = ntohl(dev);
429                 memcpy(&parsed->inode, fsid+8, 4);
430                 break;
431
432         case FSID_ENCODE_DEV: /* 8 bytes: 4 byte packed device number, 4 inode */
433                 /* This is *host* endian, not net-byte-order, because
434                  * no-one outside this host has any business interpreting it
435                  */
436                 if (fsidlen != 8)
437                         return -1;
438                 memcpy(&dev, fsid, 4);
439                 memcpy(&parsed->inode, fsid+4, 4);
440                 parsed->major = (dev & 0xfff00) >> 8;
441                 parsed->minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
442                 break;
443
444         case FSID_UUID4_INUM: /* 4 byte inode number and 4 byte uuid */
445                 if (fsidlen != 8)
446                         return -1;
447                 memcpy(&parsed->inode, fsid, 4);
448                 parsed->uuidlen = 4;
449                 parsed->fhuuid = fsid+4;
450                 break;
451         case FSID_UUID8: /* 8 byte uuid */
452                 if (fsidlen != 8)
453                         return -1;
454                 parsed->uuidlen = 8;
455                 parsed->fhuuid = fsid;
456                 break;
457         case FSID_UUID16: /* 16 byte uuid */
458                 if (fsidlen != 16)
459                         return -1;
460                 parsed->uuidlen = 16;
461                 parsed->fhuuid = fsid;
462                 break;
463         case FSID_UUID16_INUM: /* 8 byte inode number and 16 byte uuid */
464                 if (fsidlen != 24)
465                         return -1;
466                 memcpy(&inode64, fsid, 8);
467                 parsed->inode = inode64;
468                 parsed->uuidlen = 16;
469                 parsed->fhuuid = fsid+8;
470                 break;
471         }
472         return 0;
473 }
474
475 static bool match_fsid(struct parsed_fsid *parsed, nfs_export *exp, char *path)
476 {
477         struct stat stb;
478         int type;
479         char u[16];
480
481         if (stat(path, &stb) != 0)
482                 return false;
483         if (!S_ISDIR(stb.st_mode) && !S_ISREG(stb.st_mode))
484                 return false;
485
486         switch (parsed->fsidtype) {
487         case FSID_DEV:
488         case FSID_MAJOR_MINOR:
489         case FSID_ENCODE_DEV:
490                 if (stb.st_ino != parsed->inode)
491                         return false;
492                 if (parsed->major != major(stb.st_dev) ||
493                     parsed->minor != minor(stb.st_dev))
494                         return false;
495                 return true;
496         case FSID_NUM:
497                 if (((exp->m_export.e_flags & NFSEXP_FSID) == 0 ||
498                      exp->m_export.e_fsid != parsed->fsidnum))
499                         return false;
500                 return true;
501         case FSID_UUID4_INUM:
502         case FSID_UUID16_INUM:
503                 if (stb.st_ino != parsed->inode)
504                         return false;
505                 goto check_uuid;
506         case FSID_UUID8:
507         case FSID_UUID16:
508                 if (!is_mountpoint(path))
509                         return false;
510         check_uuid:
511                 if (exp->m_export.e_uuid)
512                         get_uuid(exp->m_export.e_uuid, parsed->uuidlen, u);
513                 else
514                         for (type = 0;
515                              uuid_by_path(path, type, parsed->uuidlen, u);
516                              type++)
517                                 if (memcmp(u, parsed->fhuuid, parsed->uuidlen) == 0)
518                                         return true;
519
520                 if (memcmp(u, parsed->fhuuid, parsed->uuidlen) != 0)
521                         return false;
522                 return true;
523         }
524         /* Well, unreachable, actually: */
525         return false;
526 }
527
528 static struct addrinfo *lookup_client_addr(char *dom)
529 {
530         struct addrinfo *ret;
531         struct addrinfo *tmp;
532
533         dom++; /* skip initial "$" */
534
535         tmp = host_pton(dom);
536         if (tmp == NULL)
537                 return NULL;
538         ret = client_resolve(tmp->ai_addr);
539         freeaddrinfo(tmp);
540         return ret;
541 }
542
543 static void nfsd_fh(FILE *f)
544 {
545         /* request are:
546          *  domain fsidtype fsid
547          * interpret fsid, find export point and options, and write:
548          *  domain fsidtype fsid expiry path
549          */
550         char *cp;
551         char *dom;
552         int fsidtype;
553         int fsidlen;
554         char fsid[32];
555         struct parsed_fsid parsed;
556         struct exportent *found = NULL;
557         struct addrinfo *ai = NULL;
558         char *found_path = NULL;
559         nfs_export *exp;
560         int i;
561         int dev_missing = 0;
562
563         if (readline(fileno(f), &lbuf, &lbuflen) != 1)
564                 return;
565
566         xlog(D_CALL, "nfsd_fh: inbuf '%s'", lbuf);
567
568         cp = lbuf;
569
570         dom = malloc(strlen(cp));
571         if (dom == NULL)
572                 return;
573         if (qword_get(&cp, dom, strlen(cp)) <= 0)
574                 goto out;
575         if (qword_get_int(&cp, &fsidtype) != 0)
576                 goto out;
577         if (fsidtype < 0 || fsidtype > 7)
578                 goto out; /* unknown type */
579         if ((fsidlen = qword_get(&cp, fsid, 32)) <= 0)
580                 goto out;
581         if (parse_fsid(fsidtype, fsidlen, fsid, &parsed))
582                 goto out;
583
584         auth_reload();
585
586         if (is_ipaddr_client(dom)) {
587                 ai = lookup_client_addr(dom);
588                 if (!ai)
589                         goto out;
590         }
591
592         /* Now determine export point for this fsid/domain */
593         for (i=0 ; i < MCL_MAXTYPES; i++) {
594                 nfs_export *next_exp;
595                 for (exp = exportlist[i].p_head; exp; exp = next_exp) {
596                         char *path;
597
598                         if (exp->m_export.e_flags & NFSEXP_CROSSMOUNT) {
599                                 static nfs_export *prev = NULL;
600                                 static void *mnt = NULL;
601                                 
602                                 if (prev == exp) {
603                                         /* try a submount */
604                                         path = next_mnt(&mnt, exp->m_export.e_path);
605                                         if (!path) {
606                                                 next_exp = exp->m_next;
607                                                 prev = NULL;
608                                                 continue;
609                                         }
610                                         next_exp = exp;
611                                 } else {
612                                         prev = exp;
613                                         mnt = NULL;
614                                         path = exp->m_export.e_path;
615                                         next_exp = exp;
616                                 }
617                         } else {
618                                 path = exp->m_export.e_path;
619                                 next_exp = exp->m_next;
620                         }
621
622                         if (!is_ipaddr_client(dom)
623                                         && !namelist_client_matches(exp, dom))
624                                 continue;
625                         if (exp->m_export.e_mountpoint &&
626                             !is_mountpoint(exp->m_export.e_mountpoint[0]?
627                                            exp->m_export.e_mountpoint:
628                                            exp->m_export.e_path))
629                                 dev_missing ++;
630
631                         if (!match_fsid(&parsed, exp, path))
632                                 continue;
633                         if (is_ipaddr_client(dom)
634                                         && !ipaddr_client_matches(exp, ai))
635                                 continue;
636                         if (!found || subexport(&exp->m_export, found)) {
637                                 found = &exp->m_export;
638                                 free(found_path);
639                                 found_path = strdup(path);
640                                 if (found_path == NULL)
641                                         goto out;
642                         } else if (strcmp(found->e_path, exp->m_export.e_path) != 0
643                                    && !subexport(found, &exp->m_export))
644                         {
645                                 xlog(L_WARNING, "%s and %s have same filehandle for %s, using first",
646                                      found_path, path, dom);
647                         } else {
648                                 /* same path, if one is V4ROOT, choose the other */
649                                 if (found->e_flags & NFSEXP_V4ROOT) {
650                                         found = &exp->m_export;
651                                         free(found_path);
652                                         found_path = strdup(path);
653                                         if (found_path == NULL)
654                                                 goto out;
655                                 }
656                         }
657                 }
658         }
659         if (found && 
660             found->e_mountpoint &&
661             !is_mountpoint(found->e_mountpoint[0]?
662                            found->e_mountpoint:
663                            found->e_path)) {
664                 /* Cannot export this yet 
665                  * should log a warning, but need to rate limit
666                    xlog(L_WARNING, "%s not exported as %d not a mountpoint",
667                    found->e_path, found->e_mountpoint);
668                  */
669                 /* FIXME we need to make sure we re-visit this later */
670                 goto out;
671         }
672         if (!found && dev_missing) {
673                 /* The missing dev could be what we want, so just be
674                  * quite rather than returning stale yet
675                  */
676                 goto out;
677         }
678
679         if (found)
680                 if (cache_export_ent(dom, found, found_path) < 0)
681                         found = 0;
682
683         qword_print(f, dom);
684         qword_printint(f, fsidtype);
685         qword_printhex(f, fsid, fsidlen);
686         /* The fsid -> path lookup can be quite expensive as it
687          * potentially stats and reads lots of devices, and some of those
688          * might have spun-down.  The Answer is not likely to
689          * change underneath us, and an 'exportfs -f' can always
690          * remove this from the kernel, so use a really log
691          * timeout.  Maybe this should be configurable on the command
692          * line.
693          */
694         qword_printint(f, 0x7fffffff);
695         if (found)
696                 qword_print(f, found_path);
697         qword_eol(f);
698  out:
699         if (found_path)
700                 free(found_path);
701         freeaddrinfo(ai);
702         free(dom);
703         xlog(D_CALL, "nfsd_fh: found %p path %s", found, found ? found->e_path : NULL);
704         return;         
705 }
706
707 static void write_fsloc(FILE *f, struct exportent *ep)
708 {
709         struct servers *servers;
710
711         if (ep->e_fslocmethod == FSLOC_NONE)
712                 return;
713
714         servers = replicas_lookup(ep->e_fslocmethod, ep->e_fslocdata);
715         if (!servers)
716                 return;
717         qword_print(f, "fsloc");
718         qword_printint(f, servers->h_num);
719         if (servers->h_num >= 0) {
720                 int i;
721                 for (i=0; i<servers->h_num; i++) {
722                         qword_print(f, servers->h_mp[i]->h_host);
723                         qword_print(f, servers->h_mp[i]->h_path);
724                 }
725         }
726         qword_printint(f, servers->h_referral);
727         release_replicas(servers);
728 }
729
730 static void write_secinfo(FILE *f, struct exportent *ep, int flag_mask)
731 {
732         struct sec_entry *p;
733
734         for (p = ep->e_secinfo; p->flav; p++)
735                 ; /* Do nothing */
736         if (p == ep->e_secinfo) {
737                 /* There was no sec= option */
738                 return;
739         }
740         qword_print(f, "secinfo");
741         qword_printint(f, p - ep->e_secinfo);
742         for (p = ep->e_secinfo; p->flav; p++) {
743                 qword_printint(f, p->flav->fnum);
744                 qword_printint(f, p->flags & flag_mask);
745         }
746
747 }
748
749 static int dump_to_cache(FILE *f, char *domain, char *path, struct exportent *exp)
750 {
751         qword_print(f, domain);
752         qword_print(f, path);
753         if (exp) {
754                 int different_fs = strcmp(path, exp->e_path) != 0;
755                 int flag_mask = different_fs ? ~NFSEXP_FSID : ~0;
756
757                 qword_printtimefrom(f, exp->e_ttl);
758                 qword_printint(f, exp->e_flags & flag_mask);
759                 qword_printint(f, exp->e_anonuid);
760                 qword_printint(f, exp->e_anongid);
761                 qword_printint(f, exp->e_fsid);
762                 write_fsloc(f, exp);
763                 write_secinfo(f, exp, flag_mask);
764                 if (exp->e_uuid == NULL || different_fs) {
765                         char u[16];
766                         if (uuid_by_path(path, 0, 16, u)) {
767                                 qword_print(f, "uuid");
768                                 qword_printhex(f, u, 16);
769                         }
770                 } else {
771                         char u[16];
772                         get_uuid(exp->e_uuid, 16, u);
773                         qword_print(f, "uuid");
774                         qword_printhex(f, u, 16);
775                 }
776         } else
777                 qword_printtimefrom(f, DEFAULT_TTL);
778         return qword_eol(f);
779 }
780
781 static nfs_export *
782 lookup_export(char *dom, char *path, struct addrinfo *ai)
783 {
784         nfs_export *exp;
785         nfs_export *found = NULL;
786         int found_type = 0;
787         int i;
788
789         for (i=0 ; i < MCL_MAXTYPES; i++) {
790                 for (exp = exportlist[i].p_head; exp; exp = exp->m_next) {
791                         if (!export_matches(exp, dom, path, ai))
792                                 continue;
793                         if (!found) {
794                                 found = exp;
795                                 found_type = i;
796                                 continue;
797                         }
798                         /* Always prefer non-V4ROOT exports */
799                         if (exp->m_export.e_flags & NFSEXP_V4ROOT)
800                                 continue;
801                         if (found->m_export.e_flags & NFSEXP_V4ROOT) {
802                                 found = exp;
803                                 found_type = i;
804                                 continue;
805                         }
806
807                         /* If one is a CROSSMOUNT, then prefer the longest path */
808                         if (((found->m_export.e_flags & NFSEXP_CROSSMOUNT) ||
809                              (exp->m_export.e_flags & NFSEXP_CROSSMOUNT)) &&
810                             strlen(found->m_export.e_path) !=
811                             strlen(exp->m_export.e_path)) {
812
813                                 if (strlen(exp->m_export.e_path) >
814                                     strlen(found->m_export.e_path)) {
815                                         found = exp;
816                                         found_type = i;
817                                 }
818                                 continue;
819
820                         } else if (found_type == i && found->m_warned == 0) {
821                                 xlog(L_WARNING, "%s exported to both %s and %s, "
822                                      "arbitrarily choosing options from first",
823                                      path, found->m_client->m_hostname, exp->m_client->m_hostname,
824                                      dom);
825                                 found->m_warned = 1;
826                         }
827                 }
828         }
829         return found;
830 }
831
832 #ifdef HAVE_NFS_PLUGIN_H
833 #include <dlfcn.h>
834 #include <link.h>
835 #include <nfs-plugin.h>
836
837 /*
838  * Find the export entry for the parent of "pathname".
839  * Caller must not free returned exportent.
840  */
841 static struct exportent *lookup_parent_export(char *dom,
842                 const char *pathname, struct addrinfo *ai)
843 {
844         char *parent, *slash;
845         nfs_export *result;
846
847         parent = strdup(pathname);
848         if (parent == NULL) {
849                 xlog(D_GENERAL, "%s: failed to allocate parent path buffer",
850                         __func__);
851                 goto out_default;
852         }
853         xlog(D_CALL, "%s: pathname = '%s'", __func__, pathname);
854
855 again:
856         /* shorten pathname by one component */
857         slash = strrchr(parent, '/');
858         if (slash == NULL) {
859                 xlog(D_GENERAL, "%s: no slash found in pathname",
860                         __func__);
861                 goto out_default;
862         }
863         *slash = '\0';
864
865         if (strlen(parent) == 0) {
866                 result = lookup_export(dom, "/", ai);
867                 if (result == NULL) {
868                         xlog(L_ERROR, "%s: no root export found.", __func__);
869                         goto out_default;
870                 }
871                 goto out;
872         }
873
874         result = lookup_export(dom, parent, ai);
875         if (result == NULL) {
876                 xlog(D_GENERAL, "%s: lookup_export(%s) found nothing",
877                         __func__, parent);
878                 goto again;
879         }
880
881 out:
882         xlog(D_CALL, "%s: found export for %s", __func__, parent);
883         free(parent);
884         return &result->m_export;
885
886 out_default:
887         free(parent);
888         return mkexportent("*", "/", "insecure");
889 }
890
891 /*
892  * Walk through a set of FS locations and build an e_fslocdata string.
893  * Returns true if all went to plan; otherwise, false.
894  */
895 static bool locations_to_fslocdata(struct jp_ops *ops,
896                 nfs_fsloc_set_t locations, char *fslocdata,
897                 size_t remaining, int *ttl)
898 {
899         char *server, *last_path, *rootpath, *ptr;
900         _Bool seen = false;
901
902         last_path = NULL;
903         rootpath = NULL;
904         server = NULL;
905         ptr = fslocdata;
906         *ttl = 0;
907
908         for (;;) {
909                 enum jp_status status;
910                 int len;
911
912                 status = ops->jp_get_next_location(locations, &server,
913                                                         &rootpath, ttl);
914                 if (status == JP_EMPTY)
915                         break;
916                 if (status != JP_OK) {
917                         xlog(D_GENERAL, "%s: failed to parse location: %s",
918                                 __func__, ops->jp_error(status));
919                         goto out_false;
920                 }
921                 xlog(D_GENERAL, "%s: Location: %s:%s",
922                         __func__, server, rootpath);
923
924                 if (last_path && strcmp(rootpath, last_path) == 0) {
925                         len = snprintf(ptr, remaining, "+%s", server);
926                         if (len < 0) {
927                                 xlog(D_GENERAL, "%s: snprintf: %m", __func__);
928                                 goto out_false;
929                         }
930                         if ((size_t)len >= remaining) {
931                                 xlog(D_GENERAL, "%s: fslocdata buffer overflow", __func__);
932                                 goto out_false;
933                         }
934                         remaining -= (size_t)len;
935                         ptr += len;
936                 } else {
937                         if (last_path == NULL)
938                                 len = snprintf(ptr, remaining, "%s@%s",
939                                                         rootpath, server);
940                         else
941                                 len = snprintf(ptr, remaining, ":%s@%s",
942                                                         rootpath, server);
943                         if (len < 0) {
944                                 xlog(D_GENERAL, "%s: snprintf: %m", __func__);
945                                 goto out_false;
946                         }
947                         if ((size_t)len >= remaining) {
948                                 xlog(D_GENERAL, "%s: fslocdata buffer overflow",
949                                         __func__);
950                                 goto out_false;
951                         }
952                         remaining -= (size_t)len;
953                         ptr += len;
954                         last_path = rootpath;
955                 }
956
957                 seen = true;
958                 free(rootpath);
959                 free(server);
960         }
961
962         xlog(D_CALL, "%s: fslocdata='%s', ttl=%d",
963                 __func__, fslocdata, *ttl);
964         return seen;
965
966 out_false:
967         free(rootpath);
968         free(server);
969         return false;
970 }
971
972 /*
973  * Duplicate the junction's parent's export options and graft in
974  * the fslocdata we constructed from the locations list.
975  */
976 static struct exportent *create_junction_exportent(struct exportent *parent,
977                 const char *junction, const char *fslocdata, int ttl)
978 {
979         static struct exportent *eep;
980
981         eep = (struct exportent *)malloc(sizeof(*eep));
982         if (eep == NULL)
983                 goto out_nomem;
984
985         dupexportent(eep, parent);
986         strcpy(eep->e_path, junction);
987         eep->e_hostname = strdup(parent->e_hostname);
988         if (eep->e_hostname == NULL) {
989                 free(eep);
990                 goto out_nomem;
991         }
992         free(eep->e_uuid);
993         eep->e_uuid = NULL;
994         eep->e_ttl = (unsigned int)ttl;
995
996         free(eep->e_fslocdata);
997         eep->e_fslocdata = strdup(fslocdata);
998         if (eep->e_fslocdata == NULL) {
999                 free(eep->e_hostname);
1000                 free(eep);
1001                 goto out_nomem;
1002         }
1003         eep->e_fslocmethod = FSLOC_REFER;
1004         return eep;
1005
1006 out_nomem:
1007         xlog(L_ERROR, "%s: No memory", __func__);
1008         return NULL;
1009 }
1010
1011 /*
1012  * Walk through the set of FS locations and build an exportent.
1013  * Returns pointer to an exportent if "junction" refers to a junction.
1014  */
1015 static struct exportent *locations_to_export(struct jp_ops *ops,
1016                 nfs_fsloc_set_t locations, const char *junction,
1017                 struct exportent *parent)
1018 {
1019         static char fslocdata[BUFSIZ];
1020         int ttl;
1021
1022         fslocdata[0] = '\0';
1023         if (!locations_to_fslocdata(ops, locations,
1024                                         fslocdata, sizeof(fslocdata), &ttl))
1025                 return NULL;
1026         return create_junction_exportent(parent, junction, fslocdata, ttl);
1027 }
1028
1029 /*
1030  * Retrieve locations information in "junction" and dump it to the
1031  * kernel.  Returns pointer to an exportent if "junction" refers
1032  * to a junction.
1033  */
1034 static struct exportent *invoke_junction_ops(void *handle, char *dom,
1035                 const char *junction, struct addrinfo *ai)
1036 {
1037         struct exportent *parent, *exp = NULL;
1038         nfs_fsloc_set_t locations;
1039         enum jp_status status;
1040         struct jp_ops *ops;
1041         char *error;
1042
1043         ops = (struct jp_ops *)dlsym(handle, "nfs_junction_ops");
1044         error = dlerror();
1045         if (error != NULL) {
1046                 xlog(D_GENERAL, "%s: dlsym(jp_junction_ops): %s",
1047                         __func__, error);
1048                 return NULL;
1049         }
1050         if (ops->jp_api_version != JP_API_VERSION) {
1051                 xlog(D_GENERAL, "%s: unrecognized junction API version: %u",
1052                         __func__, ops->jp_api_version);
1053                 return NULL;
1054         }
1055
1056         status = ops->jp_init(false);
1057         if (status != JP_OK) {
1058                 xlog(D_GENERAL, "%s: failed to resolve %s: %s",
1059                         __func__, junction, ops->jp_error(status));
1060                 return NULL;
1061         }
1062
1063         status = ops->jp_get_locations(junction, &locations);
1064         switch (status) {
1065         case JP_OK:
1066                 break;
1067         case JP_NOTJUNCTION:
1068                 xlog(D_GENERAL, "%s: %s is not a junction",
1069                         __func__, junction);
1070                 goto out;
1071         default:
1072                 xlog(L_WARNING, "Dangling junction %s: %s",
1073                         junction, ops->jp_error(status));
1074                 goto out;
1075         }
1076
1077         parent = lookup_parent_export(dom, junction, ai);
1078         if (parent == NULL)
1079                 goto out;
1080
1081         exp = locations_to_export(ops, locations, junction, parent);
1082
1083         ops->jp_put_locations(locations);
1084
1085 out:
1086         ops->jp_done();
1087         return exp;
1088 }
1089
1090 /*
1091  * Load the junction plug-in, then try to resolve "pathname".
1092  * Returns pointer to an initialized exportent if "junction"
1093  * refers to a junction, or NULL if not.
1094  */
1095 static struct exportent *lookup_junction(char *dom, const char *pathname,
1096                 struct addrinfo *ai)
1097 {
1098         struct exportent *exp;
1099         struct link_map *map;
1100         void *handle;
1101
1102         handle = dlopen("libnfsjunct.so", RTLD_NOW);
1103         if (handle == NULL) {
1104                 xlog(D_GENERAL, "%s: dlopen: %s", __func__, dlerror());
1105                 return NULL;
1106         }
1107
1108         if (dlinfo(handle, RTLD_DI_LINKMAP, &map) == 0)
1109                 xlog(D_GENERAL, "%s: loaded plug-in %s",
1110                         __func__, map->l_name);
1111
1112         (void)dlerror();        /* Clear any error */
1113
1114         exp = invoke_junction_ops(handle, dom, pathname, ai);
1115
1116         /* We could leave it loaded to make junction resolution
1117          * faster next time.  However, if we want to replace the
1118          * library, that would require restarting mountd. */
1119         (void)dlclose(handle);
1120         return exp;
1121 }
1122
1123 static void lookup_nonexport(FILE *f, char *dom, char *path,
1124                 struct addrinfo *ai)
1125 {
1126         struct exportent *eep;
1127
1128         eep = lookup_junction(dom, path, ai);
1129         dump_to_cache(f, dom, path, eep);
1130         if (eep == NULL)
1131                 return;
1132         exportent_release(eep);
1133         free(eep);
1134 }
1135 #else   /* !HAVE_NFS_PLUGIN_H */
1136 static void lookup_nonexport(FILE *f, char *dom, char *path,
1137                 struct addrinfo *UNUSED(ai))
1138 {
1139         dump_to_cache(f, dom, path, NULL);
1140 }
1141 #endif  /* !HAVE_NFS_PLUGIN_H */
1142
1143 static void nfsd_export(FILE *f)
1144 {
1145         /* requests are:
1146          *  domain path
1147          * determine export options and return:
1148          *  domain path expiry flags anonuid anongid fsid
1149          */
1150
1151         char *cp;
1152         char *dom, *path;
1153         nfs_export *found = NULL;
1154         struct addrinfo *ai = NULL;
1155
1156         if (readline(fileno(f), &lbuf, &lbuflen) != 1)
1157                 return;
1158
1159         xlog(D_CALL, "nfsd_export: inbuf '%s'", lbuf);
1160
1161         cp = lbuf;
1162         dom = malloc(strlen(cp));
1163         path = malloc(strlen(cp));
1164
1165         if (!dom || !path)
1166                 goto out;
1167
1168         if (qword_get(&cp, dom, strlen(lbuf)) <= 0)
1169                 goto out;
1170         if (qword_get(&cp, path, strlen(lbuf)) <= 0)
1171                 goto out;
1172
1173         auth_reload();
1174
1175         if (is_ipaddr_client(dom)) {
1176                 ai = lookup_client_addr(dom);
1177                 if (!ai)
1178                         goto out;
1179         }
1180
1181         found = lookup_export(dom, path, ai);
1182
1183         if (found) {
1184                 if (dump_to_cache(f, dom, path, &found->m_export) < 0) {
1185                         xlog(L_WARNING,
1186                              "Cannot export %s, possibly unsupported filesystem"
1187                              " or fsid= required", path);
1188                         dump_to_cache(f, dom, path, NULL);
1189                 }
1190         } else
1191                 lookup_nonexport(f, dom, path, ai);
1192
1193  out:
1194         xlog(D_CALL, "nfsd_export: found %p path %s", found, path ? path : NULL);
1195         if (dom) free(dom);
1196         if (path) free(path);
1197         freeaddrinfo(ai);
1198 }
1199
1200
1201 struct {
1202         char *cache_name;
1203         void (*cache_handle)(FILE *f);
1204         FILE *f;
1205         char vbuf[RPC_CHAN_BUF_SIZE];
1206 } cachelist[] = {
1207         { "auth.unix.ip", auth_unix_ip, NULL, ""},
1208         { "auth.unix.gid", auth_unix_gid, NULL, ""},
1209         { "nfsd.export", nfsd_export, NULL, ""},
1210         { "nfsd.fh", nfsd_fh, NULL, ""},
1211         { NULL, NULL, NULL, ""}
1212 };
1213
1214 extern int manage_gids;
1215
1216 /**
1217  * cache_open - prepare communications channels with kernel RPC caches
1218  *
1219  */
1220 void cache_open(void) 
1221 {
1222         int i;
1223         for (i=0; cachelist[i].cache_name; i++ ) {
1224                 char path[100];
1225                 if (!manage_gids && cachelist[i].cache_handle == auth_unix_gid)
1226                         continue;
1227                 sprintf(path, "/proc/net/rpc/%s/channel", cachelist[i].cache_name);
1228                 cachelist[i].f = fopen(path, "r+");
1229                 if (cachelist[i].f != NULL) {
1230                         setvbuf(cachelist[i].f, cachelist[i].vbuf, _IOLBF, 
1231                                 RPC_CHAN_BUF_SIZE);
1232                 }
1233         }
1234 }
1235
1236 /**
1237  * cache_set_fds - prepare cache file descriptors for one iteration of the service loop
1238  * @fdset: pointer to fd_set to prepare
1239  */
1240 void cache_set_fds(fd_set *fdset)
1241 {
1242         int i;
1243         for (i=0; cachelist[i].cache_name; i++) {
1244                 if (cachelist[i].f)
1245                         FD_SET(fileno(cachelist[i].f), fdset);
1246         }
1247 }
1248
1249 /**
1250  * cache_process_req - process any active cache file descriptors during service loop iteration
1251  * @fdset: pointer to fd_set to examine for activity
1252  */
1253 int cache_process_req(fd_set *readfds) 
1254 {
1255         int i;
1256         int cnt = 0;
1257         for (i=0; cachelist[i].cache_name; i++) {
1258                 if (cachelist[i].f != NULL &&
1259                     FD_ISSET(fileno(cachelist[i].f), readfds)) {
1260                         cnt++;
1261                         cachelist[i].cache_handle(cachelist[i].f);
1262                         FD_CLR(fileno(cachelist[i].f), readfds);
1263                 }
1264         }
1265         return cnt;
1266 }
1267
1268
1269 /*
1270  * Give IP->domain and domain+path->options to kernel
1271  * % echo nfsd $IP  $[now+DEFAULT_TTL] $domain > /proc/net/rpc/auth.unix.ip/channel
1272  * % echo $domain $path $[now+DEFAULT_TTL] $options $anonuid $anongid $fsid > /proc/net/rpc/nfsd.export/channel
1273  */
1274
1275 static int cache_export_ent(char *domain, struct exportent *exp, char *path)
1276 {
1277         int err;
1278         FILE *f = fopen("/proc/net/rpc/nfsd.export/channel", "w");
1279         if (!f)
1280                 return -1;
1281
1282         err = dump_to_cache(f, domain, exp->e_path, exp);
1283         if (err) {
1284                 xlog(L_WARNING,
1285                      "Cannot export %s, possibly unsupported filesystem or"
1286                      " fsid= required", exp->e_path);
1287         }
1288
1289         while (err == 0 && (exp->e_flags & NFSEXP_CROSSMOUNT) && path) {
1290                 /* really an 'if', but we can break out of
1291                  * a 'while' more easily */
1292                 /* Look along 'path' for other filesystems
1293                  * and export them with the same options
1294                  */
1295                 struct stat stb;
1296                 size_t l = strlen(exp->e_path);
1297                 __dev_t dev;
1298
1299                 if (strlen(path) <= l || path[l] != '/' ||
1300                     strncmp(exp->e_path, path, l) != 0)
1301                         break;
1302                 if (stat(exp->e_path, &stb) != 0)
1303                         break;
1304                 dev = stb.st_dev;
1305                 while(path[l] == '/') {
1306                         char c;
1307                         /* errors for submount should fail whole filesystem */
1308                         int err2;
1309
1310                         l++;
1311                         while (path[l] != '/' && path[l])
1312                                 l++;
1313                         c = path[l];
1314                         path[l] = 0;
1315                         err2 = lstat(path, &stb);
1316                         path[l] = c;
1317                         if (err2 < 0)
1318                                 break;
1319                         if (stb.st_dev == dev)
1320                                 continue;
1321                         dev = stb.st_dev;
1322                         path[l] = 0;
1323                         dump_to_cache(f, domain, path, exp);
1324                         path[l] = c;
1325                 }
1326                 break;
1327         }
1328
1329         fclose(f);
1330         return err;
1331 }
1332
1333 /**
1334  * cache_export - Inform kernel of a new nfs_export
1335  * @exp: target nfs_export
1336  * @path: NUL-terminated C string containing export path
1337  */
1338 int cache_export(nfs_export *exp, char *path)
1339 {
1340         char buf[INET6_ADDRSTRLEN];
1341         int err;
1342         FILE *f;
1343
1344         f = fopen("/proc/net/rpc/auth.unix.ip/channel", "w");
1345         if (!f)
1346                 return -1;
1347
1348
1349         qword_print(f, "nfsd");
1350         qword_print(f,
1351                 host_ntop(get_addrlist(exp->m_client, 0), buf, sizeof(buf)));
1352         qword_printtimefrom(f, exp->m_export.e_ttl);
1353         qword_print(f, exp->m_client->m_hostname);
1354         err = qword_eol(f);
1355         
1356         fclose(f);
1357
1358         err = cache_export_ent(exp->m_client->m_hostname, &exp->m_export, path)
1359                 || err;
1360         return err;
1361 }
1362
1363 /**
1364  * cache_get_filehandle - given an nfs_export, get its root filehandle
1365  * @exp: target nfs_export
1366  * @len: length of requested file handle
1367  * @p: NUL-terminated C string containing export path
1368  *
1369  * Returns pointer to NFS file handle of root directory of export
1370  *
1371  * { 
1372  *   echo $domain $path $length 
1373  *   read filehandle <&0
1374  * } <> /proc/fs/nfsd/filehandle
1375  */
1376 struct nfs_fh_len *
1377 cache_get_filehandle(nfs_export *exp, int len, char *p)
1378 {
1379         FILE *f = fopen("/proc/fs/nfsd/filehandle", "r+");
1380         char buf[200];
1381         char *bp = buf;
1382         int failed;
1383         static struct nfs_fh_len fh;
1384
1385         if (!f)
1386                 f = fopen("/proc/fs/nfs/filehandle", "r+");
1387         if (!f)
1388                 return NULL;
1389
1390         qword_print(f, exp->m_client->m_hostname);
1391         qword_print(f, p);
1392         qword_printint(f, len); 
1393         failed = qword_eol(f);
1394         
1395         if (!failed)
1396                 failed = (fgets(buf, sizeof(buf), f) == NULL);
1397         fclose(f);
1398         if (failed)
1399                 return NULL;
1400         memset(fh.fh_handle, 0, sizeof(fh.fh_handle));
1401         fh.fh_size = qword_get(&bp, (char *)fh.fh_handle, NFS3_FHSIZE);
1402         return &fh;
1403 }