]> git.decadent.org.uk Git - nfs-utils.git/blob - utils/blkmapd/device-discovery.c
blkmapd: proper signal handling
[nfs-utils.git] / utils / blkmapd / device-discovery.c
1 /*
2  * device-discovery.c: main function, discovering device and processing
3  * pipe request from kernel.
4  *
5  * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <sys/mount.h>
33 #include <sys/select.h>
34 #include <sys/inotify.h>
35 #include <linux/kdev_t.h>
36 #include <scsi/scsi.h>
37 #include <scsi/scsi_ioctl.h>
38 #include <scsi/sg.h>
39 #include <signal.h>
40
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <syslog.h>
45 #include <dirent.h>
46 #include <ctype.h>
47 #include <fcntl.h>
48 #include <unistd.h>
49 #include <libgen.h>
50 #include <errno.h>
51 #include <libdevmapper.h>
52
53 #include "device-discovery.h"
54
55 #define EVENT_SIZE (sizeof(struct inotify_event))
56 #define EVENT_BUFSIZE (1024 * EVENT_SIZE)
57
58 #define BL_PIPE_FILE    "/var/lib/nfs/rpc_pipefs/nfs/blocklayout"
59 #define NFSPIPE_DIR     "/var/lib/nfs/rpc_pipefs/nfs"
60 #define RPCPIPE_DIR     "/var/lib/nfs/rpc_pipefs"
61 #define PID_FILE        "/var/run/blkmapd.pid"
62
63 struct bl_disk *visible_disk_list;
64 int    bl_watch_fd, bl_pipe_fd, nfs_pipedir_wfd, rpc_pipedir_wfd;
65 int    pidfd = -1;
66
67 struct bl_disk_path *bl_get_path(const char *filepath,
68                                  struct bl_disk_path *paths)
69 {
70         struct bl_disk_path *tmp = paths;
71
72         while (tmp) {
73                 if (!strcmp(tmp->full_path, filepath))
74                         break;
75                 tmp = tmp->next;
76         }
77         return tmp;
78 }
79
80 /* Check whether valid_path is a substring(partition) of path */
81 int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path)
82 {
83         if (!strncmp(valid_path->full_path, path->full_path,
84                      strlen(valid_path->full_path)))
85                 return 1;
86
87         return 0;
88 }
89
90 /*
91  * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO,
92  * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to
93  * create pseudo device. So if state is higher, the device path needs to
94  * be updated.
95  * If device-mapper multipath support is a must, pseudo devices should
96  * exist for each multipath device. If not, active device path will be
97  * chosen for device creation.
98  * Treat partition as invalid path.
99  */
100 int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state,
101                    struct bl_disk *disk)
102 {
103         struct bl_disk_path *valid_path = disk->valid_path;
104
105         if (valid_path) {
106                 if (valid_path->state >= state) {
107                         if (bl_is_partition(valid_path, path))
108                                 return 0;
109                 }
110         }
111         return 1;
112 }
113
114 void bl_release_disk(void)
115 {
116         struct bl_disk *disk;
117         struct bl_disk_path *path = NULL;
118
119         while (visible_disk_list) {
120                 disk = visible_disk_list;
121                 path = disk->paths;
122                 while (path) {
123                         disk->paths = path->next;
124                         free(path->full_path);
125                         free(path);
126                         path = disk->paths;
127                 }
128                 if (disk->serial)
129                         free(disk->serial);
130                 visible_disk_list = disk->next;
131                 free(disk);
132         }
133 }
134
135 void bl_add_disk(char *filepath)
136 {
137         struct bl_disk *disk = NULL;
138         int fd = 0;
139         struct stat sb;
140         off_t size = 0;
141         struct bl_serial *serial = NULL;
142         enum bl_path_state_e ap_state;
143         struct bl_disk_path *diskpath = NULL, *path = NULL;
144         dev_t dev;
145
146         fd = open(filepath, O_RDONLY | O_LARGEFILE);
147         if (fd < 0)
148                 return;
149
150         if (fstat(fd, &sb)) {
151                 close(fd);
152                 return;
153         }
154
155         if (!sb.st_size)
156                 ioctl(fd, BLKGETSIZE, &size);
157         else
158                 size = sb.st_size;
159
160         if (!size) {
161                 close(fd);
162                 return;
163         }
164
165         dev = sb.st_rdev;
166         serial = bldev_read_serial(fd, filepath);
167         if (dm_is_dm_major(major(dev)))
168                 ap_state = BL_PATH_STATE_PSEUDO;
169         else
170                 ap_state = bldev_read_ap_state(fd);
171         close(fd);
172
173         if (ap_state != BL_PATH_STATE_ACTIVE)
174                 return;
175
176         for (disk = visible_disk_list; disk != NULL; disk = disk->next) {
177                 /* Already scanned or a partition?
178                  * XXX: if released each time, maybe not need to compare
179                  */
180                 if ((serial->len == disk->serial->len) &&
181                     !memcmp(serial->data, disk->serial->data, serial->len)) {
182                         diskpath = bl_get_path(filepath, disk->paths);
183                         break;
184                 }
185         }
186
187         if (disk && diskpath)
188                 return;
189
190         /* add path */
191         path = malloc(sizeof(struct bl_disk_path));
192         if (!path) {
193                 BL_LOG_ERR("%s: Out of memory!\n", __func__);
194                 goto out_err;
195         }
196         path->next = NULL;
197         path->state = ap_state;
198         path->full_path = strdup(filepath);
199         if (!path->full_path)
200                 goto out_err;
201
202         if (!disk) {            /* add disk */
203                 disk = malloc(sizeof(struct bl_disk));
204                 if (!disk) {
205                         BL_LOG_ERR("%s: Out of memory!\n", __func__);
206                         goto out_err;
207                 }
208                 disk->next = visible_disk_list;
209                 disk->dev = dev;
210                 disk->size = size;
211                 disk->serial = serial;
212                 disk->valid_path = path;
213                 disk->paths = path;
214                 visible_disk_list = disk;
215         } else {
216                 path->next = disk->paths;
217                 disk->paths = path;
218                 /* check whether we need to update disk info */
219                 if (bl_update_path(path, path->state, disk)) {
220                         disk->dev = dev;
221                         disk->size = size;
222                         disk->valid_path = path;
223                 }
224         }
225         return;
226
227  out_err:
228         if (path) {
229                 if (path->full_path)
230                         free(path->full_path);
231                 free(path);
232         }
233         return;
234 }
235
236 int bl_discover_devices(void)
237 {
238         FILE *f;
239         int n;
240         char buf[PATH_MAX], devname[PATH_MAX], fulldevname[PATH_MAX];
241
242         /* release previous list */
243         bl_release_disk();
244
245         /* scan all block devices */
246         f = fopen("/proc/partitions", "r");
247         if (f == NULL)
248                 return 0;
249
250         while (1) {
251                 if (fgets(buf, sizeof buf, f) == NULL)
252                         break;
253                 n = sscanf(buf, "%*d %*d %*d %31s", devname);
254                 if (n != 1)
255                         continue;
256                 snprintf(fulldevname, sizeof fulldevname, "/sys/block/%s",
257                          devname);
258                 if (access(fulldevname, F_OK) < 0)
259                         continue;
260                 snprintf(fulldevname, sizeof fulldevname, "/dev/%s", devname);
261                 bl_add_disk(fulldevname);
262         }
263
264         fclose(f);
265
266         return 0;
267 }
268
269 /* process kernel request
270  * return 0: request processed, and no more request waiting;
271  * return 1: request processed, and more requests waiting;
272  * return < 0: error
273  */
274 static int bl_disk_inquiry_process(int fd)
275 {
276         int ret = 0;
277         struct bl_pipemsg_hdr head;
278         char *buf = NULL;
279         uint32_t major, minor;
280         uint16_t buflen;
281         struct bl_dev_msg reply;
282
283         /* read request */
284         if (atomicio(read, fd, &head, sizeof(head)) != sizeof(head)) {
285                 /* Note that an error in this or the next read is pretty
286                  * catastrophic, as there is no good way to resync into
287                  * the pipe's stream.
288                  */
289                 BL_LOG_ERR("Read pipefs head error!\n");
290                 ret = -EIO;
291                 goto out;
292         }
293
294         buflen = head.totallen;
295         buf = malloc(buflen);
296         if (!buf) {
297                 BL_LOG_ERR("%s: Out of memory!\n", __func__);
298                 ret = -ENOMEM;
299                 goto out;
300         }
301
302         if (atomicio(read, fd, buf, buflen) != buflen) {
303                 BL_LOG_ERR("Read pipefs content error!\n");
304                 ret = -EIO;
305                 goto out;
306         }
307
308         reply.status = BL_DEVICE_REQUEST_PROC;
309
310         switch (head.type) {
311         case BL_DEVICE_MOUNT:
312                 /*
313                  * It shouldn't be necessary to discover devices here, since
314                  * process_deviceinfo() will re-discover if it can't find
315                  * the devices it needs.  But in the case of multipath
316                  * devices (ones that appear more than once, for example an
317                  * active and a standby LUN), this will re-order them in the
318                  * correct priority.
319                  */
320                 bl_discover_devices();
321                 if (!process_deviceinfo(buf, buflen, &major, &minor)) {
322                         reply.status = BL_DEVICE_REQUEST_ERR;
323                         break;
324                 }
325                 reply.major = major;
326                 reply.minor = minor;
327                 break;
328         case BL_DEVICE_UMOUNT:
329                 if (!dm_device_remove_all((uint64_t *) buf))
330                         reply.status = BL_DEVICE_REQUEST_ERR;
331                 break;
332         default:
333                 reply.status = BL_DEVICE_REQUEST_ERR;
334                 break;
335         }
336
337         /* write to pipefs */
338         if (atomicio((void *)write, fd, &reply, sizeof(reply))
339             != sizeof(reply)) {
340                 BL_LOG_ERR("Write pipefs error!\n");
341                 ret = -EIO;
342         }
343
344  out:
345         if (buf)
346                 free(buf);
347         return ret;
348 }
349
350 static void bl_watch_dir(const char* dir, int *wd)
351 {
352         *wd = inotify_add_watch(bl_watch_fd, dir, IN_CREATE|IN_DELETE);
353         if (*wd < 0)
354                 BL_LOG_ERR("failed to watch %s: %s\n", dir, strerror(errno));
355 }
356
357 static void bl_rpcpipe_cb(void)
358 {
359         int rc, curr_byte = 0;
360         char eventArr[EVENT_BUFSIZE];
361         struct inotify_event *event;
362
363         rc = read(bl_watch_fd, &eventArr, EVENT_BUFSIZE);
364         if (rc < 0)
365                 BL_LOG_ERR("read event fail: %s", strerror(errno));
366
367         while (rc > curr_byte) {
368                 event = (struct inotify_event *)&eventArr[curr_byte];
369                 curr_byte += EVENT_SIZE + event->len;
370                 if (event->wd == rpc_pipedir_wfd) {
371                         if (strncmp(event->name, "nfs", 3))
372                                 continue;
373                         if (event->mask & IN_CREATE) {
374                                 BL_LOG_WARNING("nfs pipe dir created\n");
375                                 bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd);
376                                 bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
377                         } else if (event->mask & IN_DELETE) {
378                                 BL_LOG_WARNING("nfs pipe dir deleted\n");
379                                 inotify_rm_watch(bl_watch_fd, nfs_pipedir_wfd);
380                                 close(bl_pipe_fd);
381                                 nfs_pipedir_wfd = -1;
382                                 bl_pipe_fd = -1;
383                         }
384                 } else if (event->wd == nfs_pipedir_wfd) {
385                         if (strncmp(event->name, "blocklayout", 11))
386                                 continue;
387                         if (event->mask & IN_CREATE) {
388                                 BL_LOG_WARNING("blocklayout pipe file created\n");
389                                 bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
390                                 if (bl_pipe_fd < 0)
391                                         BL_LOG_ERR("open %s failed: %s\n",
392                                                 event->name, strerror(errno));
393                         } else if (event->mask & IN_DELETE) {
394                                 BL_LOG_WARNING("blocklayout pipe file deleted\n");
395                                 close(bl_pipe_fd);
396                                 bl_pipe_fd = -1;
397                         }
398                 }
399         }
400 }
401
402 static int bl_event_helper(void)
403 {
404         fd_set rset;
405         int ret = 0, maxfd;
406
407         for (;;) {
408                 FD_ZERO(&rset);
409                 FD_SET(bl_watch_fd, &rset);
410                 if (bl_pipe_fd > 0)
411                         FD_SET(bl_pipe_fd, &rset);
412                 maxfd = (bl_watch_fd>bl_pipe_fd)?bl_watch_fd:bl_pipe_fd;
413                 switch (select(maxfd + 1, &rset, NULL, NULL, NULL)) {
414                 case -1:
415                         if (errno == EINTR)
416                                 continue;
417                         else {
418                                 ret = -errno;
419                                 goto out;
420                         }
421                 case 0:
422                         goto out;
423                 default:
424                         if (FD_ISSET(bl_watch_fd, &rset))
425                                 bl_rpcpipe_cb();
426                         else if (bl_pipe_fd > 0 && FD_ISSET(bl_pipe_fd, &rset))
427                                 ret = bl_disk_inquiry_process(bl_pipe_fd);
428                         if (ret)
429                                 goto out;
430                 }
431         }
432  out:
433         return ret;
434 }
435
436 void sig_die(int signal)
437 {
438         if (pidfd >= 0) {
439                 close(pidfd);
440                 unlink(PID_FILE);
441         }
442         BL_LOG_ERR("exit on signal(%d)\n", signal);
443         exit(1);
444 }
445
446 /* Daemon */
447 int main(int argc, char **argv)
448 {
449         int opt, dflag = 0, fg = 0, ret = 1;
450         struct stat statbuf;
451         char pidbuf[64];
452
453         while ((opt = getopt(argc, argv, "df")) != -1) {
454                 switch (opt) {
455                 case 'd':
456                         dflag = 1;
457                         break;
458                 case 'f':
459                         fg = 1;
460                         break;
461                 }
462         }
463
464         if (fg) {
465                 openlog("blkmapd", LOG_PERROR, 0);
466         } else {
467                 if (!stat(PID_FILE, &statbuf)) {
468                         fprintf(stderr, "Pid file %s already existed\n", PID_FILE);
469                         exit(1);
470                 }
471
472                 if (daemon(0, 0) != 0) {
473                         fprintf(stderr, "Daemonize failed\n");
474                         exit(1);
475                 }
476
477                 openlog("blkmapd", LOG_PID, 0);
478                 pidfd = open(PID_FILE, O_WRONLY | O_CREAT, 0644);
479                 if (pidfd < 0) {
480                         BL_LOG_ERR("Create pid file %s failed\n", PID_FILE);
481                         exit(1);
482                 }
483
484                 if (lockf(pidfd, F_TLOCK, 0) < 0) {
485                         BL_LOG_ERR("Lock pid file %s failed\n", PID_FILE);
486                         close(pidfd);
487                         exit(1);
488                 }
489                 ftruncate(pidfd, 0);
490                 sprintf(pidbuf, "%d\n", getpid());
491                 write(pidfd, pidbuf, strlen(pidbuf));
492         }
493
494         signal(SIGINT, sig_die);
495         signal(SIGTERM, sig_die);
496         signal(SIGHUP, SIG_IGN);
497
498         if (dflag) {
499                 bl_discover_devices();
500                 exit(0);
501         }
502
503         if ((bl_watch_fd = inotify_init()) < 0) {
504                 BL_LOG_ERR("init inotify failed %s\n", strerror(errno));
505                 exit(1);
506         }
507
508         /* open pipe file */
509         bl_watch_dir(RPCPIPE_DIR, &rpc_pipedir_wfd);
510         bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd);
511
512         bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
513         if (bl_pipe_fd < 0)
514                 BL_LOG_ERR("open pipe file %s failed: %s\n", BL_PIPE_FILE, strerror(errno));
515
516         while (1) {
517                 /* discover device when needed */
518                 bl_discover_devices();
519
520                 ret = bl_event_helper();
521                 if (ret < 0) {
522                         /* what should we do with process error? */
523                         BL_LOG_ERR("inquiry process return %d\n", ret);
524                 }
525         }
526
527         if (pidfd >= 0) {
528                 close(pidfd);
529                 unlink(PID_FILE);
530         }
531
532         exit(ret);
533 }