blkmapd: allow blocklayoutdriver module to load/unload
[nfs-utils.git] / utils / blkmapd / device-discovery.c
1 /*
2  * device-discovery.c: main function, discovering device and processing
3  * pipe request from kernel.
4  *
5  * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <sys/mount.h>
33 #include <sys/select.h>
34 #include <sys/inotify.h>
35 #include <linux/kdev_t.h>
36 #include <scsi/scsi.h>
37 #include <scsi/scsi_ioctl.h>
38 #include <scsi/sg.h>
39
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <syslog.h>
44 #include <dirent.h>
45 #include <ctype.h>
46 #include <fcntl.h>
47 #include <unistd.h>
48 #include <libgen.h>
49 #include <errno.h>
50 #include <libdevmapper.h>
51
52 #include "device-discovery.h"
53
54 #define EVENT_SIZE (sizeof(struct inotify_event))
55 #define EVENT_BUFSIZE (1024 * EVENT_SIZE)
56
57 #define BL_PIPE_FILE    "/var/lib/nfs/rpc_pipefs/nfs/blocklayout"
58 #define NFSPIPE_DIR     "/var/lib/nfs/rpc_pipefs/nfs"
59 #define RPCPIPE_DIR     "/var/lib/nfs/rpc_pipefs"
60 #define PID_FILE        "/var/run/blkmapd.pid"
61
62 struct bl_disk *visible_disk_list;
63 int    bl_watch_fd, bl_pipe_fd, nfs_pipedir_wfd, rpc_pipedir_wfd;
64
65 struct bl_disk_path *bl_get_path(const char *filepath,
66                                  struct bl_disk_path *paths)
67 {
68         struct bl_disk_path *tmp = paths;
69
70         while (tmp) {
71                 if (!strcmp(tmp->full_path, filepath))
72                         break;
73                 tmp = tmp->next;
74         }
75         return tmp;
76 }
77
78 /* Check whether valid_path is a substring(partition) of path */
79 int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path)
80 {
81         if (!strncmp(valid_path->full_path, path->full_path,
82                      strlen(valid_path->full_path)))
83                 return 1;
84
85         return 0;
86 }
87
88 /*
89  * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO,
90  * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to
91  * create pseudo device. So if state is higher, the device path needs to
92  * be updated.
93  * If device-mapper multipath support is a must, pseudo devices should
94  * exist for each multipath device. If not, active device path will be
95  * chosen for device creation.
96  * Treat partition as invalid path.
97  */
98 int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state,
99                    struct bl_disk *disk)
100 {
101         struct bl_disk_path *valid_path = disk->valid_path;
102
103         if (valid_path) {
104                 if (valid_path->state >= state) {
105                         if (bl_is_partition(valid_path, path))
106                                 return 0;
107                 }
108         }
109         return 1;
110 }
111
112 void bl_release_disk(void)
113 {
114         struct bl_disk *disk;
115         struct bl_disk_path *path = NULL;
116
117         while (visible_disk_list) {
118                 disk = visible_disk_list;
119                 path = disk->paths;
120                 while (path) {
121                         disk->paths = path->next;
122                         free(path->full_path);
123                         free(path);
124                         path = disk->paths;
125                 }
126                 if (disk->serial)
127                         free(disk->serial);
128                 visible_disk_list = disk->next;
129                 free(disk);
130         }
131 }
132
133 void bl_add_disk(char *filepath)
134 {
135         struct bl_disk *disk = NULL;
136         int fd = 0;
137         struct stat sb;
138         off_t size = 0;
139         struct bl_serial *serial = NULL;
140         enum bl_path_state_e ap_state;
141         struct bl_disk_path *diskpath = NULL, *path = NULL;
142         dev_t dev;
143
144         fd = open(filepath, O_RDONLY | O_LARGEFILE);
145         if (fd < 0)
146                 return;
147
148         if (fstat(fd, &sb)) {
149                 close(fd);
150                 return;
151         }
152
153         if (!sb.st_size)
154                 ioctl(fd, BLKGETSIZE, &size);
155         else
156                 size = sb.st_size;
157
158         if (!size) {
159                 close(fd);
160                 return;
161         }
162
163         dev = sb.st_rdev;
164         serial = bldev_read_serial(fd, filepath);
165         if (dm_is_dm_major(major(dev)))
166                 ap_state = BL_PATH_STATE_PSEUDO;
167         else
168                 ap_state = bldev_read_ap_state(fd);
169         close(fd);
170
171         if (ap_state != BL_PATH_STATE_ACTIVE)
172                 return;
173
174         for (disk = visible_disk_list; disk != NULL; disk = disk->next) {
175                 /* Already scanned or a partition?
176                  * XXX: if released each time, maybe not need to compare
177                  */
178                 if ((serial->len == disk->serial->len) &&
179                     !memcmp(serial->data, disk->serial->data, serial->len)) {
180                         diskpath = bl_get_path(filepath, disk->paths);
181                         break;
182                 }
183         }
184
185         if (disk && diskpath)
186                 return;
187
188         /* add path */
189         path = malloc(sizeof(struct bl_disk_path));
190         if (!path) {
191                 BL_LOG_ERR("%s: Out of memory!\n", __func__);
192                 goto out_err;
193         }
194         path->next = NULL;
195         path->state = ap_state;
196         path->full_path = strdup(filepath);
197         if (!path->full_path)
198                 goto out_err;
199
200         if (!disk) {            /* add disk */
201                 disk = malloc(sizeof(struct bl_disk));
202                 if (!disk) {
203                         BL_LOG_ERR("%s: Out of memory!\n", __func__);
204                         goto out_err;
205                 }
206                 disk->next = visible_disk_list;
207                 disk->dev = dev;
208                 disk->size = size;
209                 disk->serial = serial;
210                 disk->valid_path = path;
211                 disk->paths = path;
212                 visible_disk_list = disk;
213         } else {
214                 path->next = disk->paths;
215                 disk->paths = path;
216                 /* check whether we need to update disk info */
217                 if (bl_update_path(path, path->state, disk)) {
218                         disk->dev = dev;
219                         disk->size = size;
220                         disk->valid_path = path;
221                 }
222         }
223         return;
224
225  out_err:
226         if (path) {
227                 if (path->full_path)
228                         free(path->full_path);
229                 free(path);
230         }
231         return;
232 }
233
234 int bl_discover_devices(void)
235 {
236         FILE *f;
237         int n;
238         char buf[PATH_MAX], devname[PATH_MAX], fulldevname[PATH_MAX];
239
240         /* release previous list */
241         bl_release_disk();
242
243         /* scan all block devices */
244         f = fopen("/proc/partitions", "r");
245         if (f == NULL)
246                 return 0;
247
248         while (1) {
249                 if (fgets(buf, sizeof buf, f) == NULL)
250                         break;
251                 n = sscanf(buf, "%*d %*d %*d %31s", devname);
252                 if (n != 1)
253                         continue;
254                 snprintf(fulldevname, sizeof fulldevname, "/sys/block/%s",
255                          devname);
256                 if (access(fulldevname, F_OK) < 0)
257                         continue;
258                 snprintf(fulldevname, sizeof fulldevname, "/dev/%s", devname);
259                 bl_add_disk(fulldevname);
260         }
261
262         fclose(f);
263
264         return 0;
265 }
266
267 /* process kernel request
268  * return 0: request processed, and no more request waiting;
269  * return 1: request processed, and more requests waiting;
270  * return < 0: error
271  */
272 static int bl_disk_inquiry_process(int fd)
273 {
274         int ret = 0;
275         struct bl_pipemsg_hdr head;
276         char *buf = NULL;
277         uint32_t major, minor;
278         uint16_t buflen;
279         struct bl_dev_msg reply;
280
281         /* read request */
282         if (atomicio(read, fd, &head, sizeof(head)) != sizeof(head)) {
283                 /* Note that an error in this or the next read is pretty
284                  * catastrophic, as there is no good way to resync into
285                  * the pipe's stream.
286                  */
287                 BL_LOG_ERR("Read pipefs head error!\n");
288                 ret = -EIO;
289                 goto out;
290         }
291
292         buflen = head.totallen;
293         buf = malloc(buflen);
294         if (!buf) {
295                 BL_LOG_ERR("%s: Out of memory!\n", __func__);
296                 ret = -ENOMEM;
297                 goto out;
298         }
299
300         if (atomicio(read, fd, buf, buflen) != buflen) {
301                 BL_LOG_ERR("Read pipefs content error!\n");
302                 ret = -EIO;
303                 goto out;
304         }
305
306         reply.status = BL_DEVICE_REQUEST_PROC;
307
308         switch (head.type) {
309         case BL_DEVICE_MOUNT:
310                 /*
311                  * It shouldn't be necessary to discover devices here, since
312                  * process_deviceinfo() will re-discover if it can't find
313                  * the devices it needs.  But in the case of multipath
314                  * devices (ones that appear more than once, for example an
315                  * active and a standby LUN), this will re-order them in the
316                  * correct priority.
317                  */
318                 bl_discover_devices();
319                 if (!process_deviceinfo(buf, buflen, &major, &minor)) {
320                         reply.status = BL_DEVICE_REQUEST_ERR;
321                         break;
322                 }
323                 reply.major = major;
324                 reply.minor = minor;
325                 break;
326         case BL_DEVICE_UMOUNT:
327                 if (!dm_device_remove_all((uint64_t *) buf))
328                         reply.status = BL_DEVICE_REQUEST_ERR;
329                 break;
330         default:
331                 reply.status = BL_DEVICE_REQUEST_ERR;
332                 break;
333         }
334
335         /* write to pipefs */
336         if (atomicio((void *)write, fd, &reply, sizeof(reply))
337             != sizeof(reply)) {
338                 BL_LOG_ERR("Write pipefs error!\n");
339                 ret = -EIO;
340         }
341
342  out:
343         if (buf)
344                 free(buf);
345         return ret;
346 }
347
348 static void bl_watch_dir(const char* dir, int *wd)
349 {
350         *wd = inotify_add_watch(bl_watch_fd, dir, IN_CREATE|IN_DELETE);
351         if (*wd < 0)
352                 BL_LOG_ERR("failed to watch %s: %s\n", dir, strerror(errno));
353 }
354
355 static void bl_rpcpipe_cb(void)
356 {
357         int rc, curr_byte = 0;
358         char eventArr[EVENT_BUFSIZE];
359         struct inotify_event *event;
360
361         rc = read(bl_watch_fd, &eventArr, EVENT_BUFSIZE);
362         if (rc < 0)
363                 BL_LOG_ERR("read event fail: %s", strerror(errno));
364
365         while (rc > curr_byte) {
366                 event = (struct inotify_event *)&eventArr[curr_byte];
367                 curr_byte += EVENT_SIZE + event->len;
368                 if (event->wd == rpc_pipedir_wfd) {
369                         if (strncmp(event->name, "nfs", 3))
370                                 continue;
371                         if (event->mask & IN_CREATE) {
372                                 BL_LOG_WARNING("nfs pipe dir created\n");
373                                 bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd);
374                                 bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
375                         } else if (event->mask & IN_DELETE) {
376                                 BL_LOG_WARNING("nfs pipe dir deleted\n");
377                                 inotify_rm_watch(bl_watch_fd, nfs_pipedir_wfd);
378                                 close(bl_pipe_fd);
379                                 nfs_pipedir_wfd = -1;
380                                 bl_pipe_fd = -1;
381                         }
382                 } else if (event->wd == nfs_pipedir_wfd) {
383                         if (strncmp(event->name, "blocklayout", 11))
384                                 continue;
385                         if (event->mask & IN_CREATE) {
386                                 BL_LOG_WARNING("blocklayout pipe file created\n");
387                                 bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
388                                 if (bl_pipe_fd < 0)
389                                         BL_LOG_ERR("open %s failed: %s\n",
390                                                 event->name, strerror(errno));
391                         } else if (event->mask & IN_DELETE) {
392                                 BL_LOG_WARNING("blocklayout pipe file deleted\n");
393                                 close(bl_pipe_fd);
394                                 bl_pipe_fd = -1;
395                         }
396                 }
397         }
398 }
399
400 static int bl_event_helper(void)
401 {
402         fd_set rset;
403         int ret = 0, maxfd;
404
405         for (;;) {
406                 FD_ZERO(&rset);
407                 FD_SET(bl_watch_fd, &rset);
408                 if (bl_pipe_fd > 0)
409                         FD_SET(bl_pipe_fd, &rset);
410                 maxfd = (bl_watch_fd>bl_pipe_fd)?bl_watch_fd:bl_pipe_fd;
411                 switch (select(maxfd + 1, &rset, NULL, NULL, NULL)) {
412                 case -1:
413                         if (errno == EINTR)
414                                 continue;
415                         else {
416                                 ret = -errno;
417                                 goto out;
418                         }
419                 case 0:
420                         goto out;
421                 default:
422                         if (FD_ISSET(bl_watch_fd, &rset))
423                                 bl_rpcpipe_cb();
424                         else if (bl_pipe_fd > 0 && FD_ISSET(bl_pipe_fd, &rset))
425                                 ret = bl_disk_inquiry_process(bl_pipe_fd);
426                         if (ret)
427                                 goto out;
428                 }
429         }
430  out:
431         return ret;
432 }
433
434 /* Daemon */
435 int main(int argc, char **argv)
436 {
437         int pidfd = -1, opt, dflag = 0, fg = 0, ret = 1;
438         struct stat statbuf;
439         char pidbuf[64];
440
441         while ((opt = getopt(argc, argv, "df")) != -1) {
442                 switch (opt) {
443                 case 'd':
444                         dflag = 1;
445                         break;
446                 case 'f':
447                         fg = 1;
448                         break;
449                 }
450         }
451
452         if (fg) {
453                 openlog("blkmapd", LOG_PERROR, 0);
454         } else {
455                 if (!stat(PID_FILE, &statbuf)) {
456                         fprintf(stderr, "Pid file %s already existed\n", PID_FILE);
457                         exit(1);
458                 }
459
460                 if (daemon(0, 0) != 0) {
461                         fprintf(stderr, "Daemonize failed\n");
462                         exit(1);
463                 }
464
465                 openlog("blkmapd", LOG_PID, 0);
466                 pidfd = open(PID_FILE, O_WRONLY | O_CREAT, 0644);
467                 if (pidfd < 0) {
468                         BL_LOG_ERR("Create pid file %s failed\n", PID_FILE);
469                         exit(1);
470                 }
471
472                 if (lockf(pidfd, F_TLOCK, 0) < 0) {
473                         BL_LOG_ERR("Lock pid file %s failed\n", PID_FILE);
474                         close(pidfd);
475                         exit(1);
476                 }
477                 ftruncate(pidfd, 0);
478                 sprintf(pidbuf, "%d\n", getpid());
479                 write(pidfd, pidbuf, strlen(pidbuf));
480         }
481
482         if (dflag) {
483                 bl_discover_devices();
484                 exit(0);
485         }
486
487         if ((bl_watch_fd = inotify_init()) < 0) {
488                 BL_LOG_ERR("init inotify failed %s\n", strerror(errno));
489                 exit(1);
490         }
491
492         /* open pipe file */
493         bl_watch_dir(RPCPIPE_DIR, &rpc_pipedir_wfd);
494         bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd);
495
496         bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
497         if (bl_pipe_fd < 0)
498                 BL_LOG_ERR("open pipe file %s failed: %s\n", BL_PIPE_FILE, strerror(errno));
499
500         while (1) {
501                 /* discover device when needed */
502                 bl_discover_devices();
503
504                 ret = bl_event_helper();
505                 if (ret < 0) {
506                         /* what should we do with process error? */
507                         BL_LOG_ERR("inquiry process return %d\n", ret);
508                 }
509         }
510
511         if (pidfd >= 0) {
512                 close(pidfd);
513                 unlink(PID_FILE);
514         }
515
516         exit(ret);
517 }