]> git.decadent.org.uk Git - nfs-utils.git/blobdiff - utils/blkmapd/device-discovery.c
blkmapd: Add complex block layout discovery and mapping daemon
[nfs-utils.git] / utils / blkmapd / device-discovery.c
diff --git a/utils/blkmapd/device-discovery.c b/utils/blkmapd/device-discovery.c
new file mode 100644 (file)
index 0000000..c21de3e
--- /dev/null
@@ -0,0 +1,453 @@
+/*
+ * device-discovery.c: main function, discovering device and processing
+ * pipe request from kernel.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/select.h>
+#include <linux/kdev_t.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/sg.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <errno.h>
+#include <libdevmapper.h>
+
+#include "device-discovery.h"
+
+#define BL_PIPE_FILE   "/var/lib/nfs/rpc_pipefs/nfs/blocklayout"
+#define PID_FILE       "/var/run/blkmapd.pid"
+
+struct bl_disk *visible_disk_list;
+
+struct bl_disk_path *bl_get_path(const char *filepath,
+                                struct bl_disk_path *paths)
+{
+       struct bl_disk_path *tmp = paths;
+
+       while (tmp) {
+               if (!strcmp(tmp->full_path, filepath))
+                       break;
+               tmp = tmp->next;
+       }
+       return tmp;
+}
+
+/* Check whether valid_path is a substring(partition) of path */
+int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path)
+{
+       if (!strncmp(valid_path->full_path, path->full_path,
+                    strlen(valid_path->full_path)))
+               return 1;
+
+       return 0;
+}
+
+/*
+ * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO,
+ * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to
+ * create pseudo device. So if state is higher, the device path needs to
+ * be updated.
+ * If device-mapper multipath support is a must, pseudo devices should
+ * exist for each multipath device. If not, active device path will be
+ * chosen for device creation.
+ * Treat partition as invalid path.
+ */
+int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state,
+                  struct bl_disk *disk)
+{
+       struct bl_disk_path *valid_path = disk->valid_path;
+
+       if (valid_path) {
+               if (valid_path->state >= state) {
+                       if (bl_is_partition(valid_path, path))
+                               return 0;
+               }
+       }
+       return 1;
+}
+
+void bl_release_disk(void)
+{
+       struct bl_disk *disk;
+       struct bl_disk_path *path = NULL;
+
+       while (visible_disk_list) {
+               disk = visible_disk_list;
+               path = disk->paths;
+               while (path) {
+                       disk->paths = path->next;
+                       free(path->full_path);
+                       free(path);
+                       path = disk->paths;
+               }
+               if (disk->serial)
+                       free(disk->serial);
+               visible_disk_list = disk->next;
+               free(disk);
+       }
+}
+
+void bl_add_disk(char *filepath)
+{
+       struct bl_disk *disk = NULL;
+       int fd = 0;
+       struct stat sb;
+       off_t size = 0;
+       struct bl_serial *serial = NULL;
+       enum bl_path_state_e ap_state;
+       struct bl_disk_path *diskpath = NULL, *path = NULL;
+       dev_t dev;
+
+       fd = open(filepath, O_RDONLY | O_LARGEFILE);
+       if (fd < 0)
+               return;
+
+       if (fstat(fd, &sb)) {
+               close(fd);
+               return;
+       }
+
+       if (!sb.st_size)
+               ioctl(fd, BLKGETSIZE, &size);
+       else
+               size = sb.st_size;
+
+       if (!size) {
+               close(fd);
+               return;
+       }
+
+       dev = sb.st_rdev;
+       serial = bldev_read_serial(fd, filepath);
+       if (dm_is_dm_major(major(dev)))
+               ap_state = BL_PATH_STATE_PSEUDO;
+       else
+               ap_state = bldev_read_ap_state(fd);
+       close(fd);
+
+       if (ap_state != BL_PATH_STATE_ACTIVE)
+               return;
+
+       for (disk = visible_disk_list; disk != NULL; disk = disk->next) {
+               /* Already scanned or a partition?
+                * XXX: if released each time, maybe not need to compare
+                */
+               if ((serial->len == disk->serial->len) &&
+                   !memcmp(serial->data, disk->serial->data, serial->len)) {
+                       diskpath = bl_get_path(filepath, disk->paths);
+                       break;
+               }
+       }
+
+       if (disk && diskpath)
+               return;
+
+       /* add path */
+       path = malloc(sizeof(struct bl_disk_path));
+       if (!path) {
+               BL_LOG_ERR("%s: Out of memory!\n", __func__);
+               goto out_err;
+       }
+       path->next = NULL;
+       path->state = ap_state;
+       path->full_path = strdup(filepath);
+       if (!path->full_path)
+               goto out_err;
+
+       if (!disk) {            /* add disk */
+               disk = malloc(sizeof(struct bl_disk));
+               if (!disk) {
+                       BL_LOG_ERR("%s: Out of memory!\n", __func__);
+                       goto out_err;
+               }
+               disk->next = visible_disk_list;
+               disk->dev = dev;
+               disk->size = size;
+               disk->serial = serial;
+               disk->valid_path = path;
+               disk->paths = path;
+               visible_disk_list = disk;
+       } else {
+               path->next = disk->paths;
+               disk->paths = path;
+               /* check whether we need to update disk info */
+               if (bl_update_path(path, path->state, disk)) {
+                       disk->dev = dev;
+                       disk->size = size;
+                       disk->valid_path = path;
+               }
+       }
+       return;
+
+ out_err:
+       if (path) {
+               if (path->full_path)
+                       free(path->full_path);
+               free(path);
+       }
+       return;
+}
+
+int bl_discover_devices(void)
+{
+       FILE *f;
+       int n;
+       char buf[PATH_MAX], devname[PATH_MAX], fulldevname[PATH_MAX];
+
+       /* release previous list */
+       bl_release_disk();
+
+       /* scan all block devices */
+       f = fopen("/proc/partitions", "r");
+       if (f == NULL)
+               return 0;
+
+       while (1) {
+               if (fgets(buf, sizeof buf, f) == NULL)
+                       break;
+               n = sscanf(buf, "%*d %*d %*d %31s", devname);
+               if (n != 1)
+                       continue;
+               snprintf(fulldevname, sizeof fulldevname, "/sys/block/%s",
+                        devname);
+               if (access(fulldevname, F_OK) < 0)
+                       continue;
+               snprintf(fulldevname, sizeof fulldevname, "/dev/%s", devname);
+               bl_add_disk(fulldevname);
+       }
+
+       fclose(f);
+
+       return 0;
+}
+
+/* process kernel request
+ * return 0: request processed, and no more request waiting;
+ * return 1: request processed, and more requests waiting;
+ * return < 0: error
+ */
+int bl_disk_inquiry_process(int fd)
+{
+       int ret = 0;
+       struct bl_pipemsg_hdr head;
+       char *buf = NULL;
+       uint32_t major, minor;
+       uint16_t buflen;
+       struct bl_dev_msg reply;
+
+       /* read request */
+       if (atomicio(read, fd, &head, sizeof(head)) != sizeof(head)) {
+               /* Note that an error in this or the next read is pretty
+                * catastrophic, as there is no good way to resync into
+                * the pipe's stream.
+                */
+               BL_LOG_ERR("Read pipefs head error!\n");
+               ret = -EIO;
+               goto out;
+       }
+
+       buflen = head.totallen;
+       buf = malloc(buflen);
+       if (!buf) {
+               BL_LOG_ERR("%s: Out of memory!\n", __func__);
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       if (atomicio(read, fd, buf, buflen) != buflen) {
+               BL_LOG_ERR("Read pipefs content error!\n");
+               ret = -EIO;
+               goto out;
+       }
+
+       reply.status = BL_DEVICE_REQUEST_PROC;
+
+       switch (head.type) {
+       case BL_DEVICE_MOUNT:
+               /*
+                * It shouldn't be necessary to discover devices here, since
+                * process_deviceinfo() will re-discover if it can't find
+                * the devices it needs.  But in the case of multipath
+                * devices (ones that appear more than once, for example an
+                * active and a standby LUN), this will re-order them in the
+                * correct priority.
+                */
+               bl_discover_devices();
+               if (!process_deviceinfo(buf, buflen, &major, &minor)) {
+                       reply.status = BL_DEVICE_REQUEST_ERR;
+                       break;
+               }
+               reply.major = major;
+               reply.minor = minor;
+               break;
+       case BL_DEVICE_UMOUNT:
+               if (!dm_device_remove_all((uint64_t *) buf))
+                       reply.status = BL_DEVICE_REQUEST_ERR;
+               break;
+       default:
+               reply.status = BL_DEVICE_REQUEST_ERR;
+               break;
+       }
+
+       /* write to pipefs */
+       if (atomicio((void *)write, fd, &reply, sizeof(reply))
+           != sizeof(reply)) {
+               BL_LOG_ERR("Write pipefs error!\n");
+               ret = -EIO;
+       }
+
+ out:
+       if (buf)
+               free(buf);
+       return ret;
+}
+
+/* TODO: set bl_process_stop to 1 in command */
+unsigned int bl_process_stop;
+
+int bl_run_disk_inquiry_process(int fd)
+{
+       fd_set rset;
+       int ret;
+
+       bl_process_stop = 0;
+
+       for (;;) {
+               if (bl_process_stop)
+                       return 1;
+               FD_ZERO(&rset);
+               FD_SET(fd, &rset);
+               ret = 0;
+               switch (select(fd + 1, &rset, NULL, NULL, NULL)) {
+               case -1:
+                       if (errno == EINTR)
+                               continue;
+                       else {
+                               ret = -errno;
+                               goto out;
+                       }
+               case 0:
+                       goto out;
+               default:
+                       if (FD_ISSET(fd, &rset))
+                               ret = bl_disk_inquiry_process(fd);
+               }
+       }
+ out:
+       return ret;
+}
+
+/* Daemon */
+int main(int argc, char **argv)
+{
+       int fd, pidfd = -1, opt, dflag = 0, fg = 0, ret = 1;
+       struct stat statbuf;
+       char pidbuf[64];
+
+       while ((opt = getopt(argc, argv, "df")) != -1) {
+               switch (opt) {
+               case 'd':
+                       dflag = 1;
+                       break;
+               case 'f':
+                       fg = 1;
+                       break;
+               }
+       }
+
+       if (fg) {
+               openlog("blkmapd", LOG_PERROR, 0);
+       } else {
+               if (!stat(PID_FILE, &statbuf)) {
+                       fprintf(stderr, "Pid file %s already existed\n", PID_FILE);
+                       exit(1);
+               }
+
+               if (daemon(0, 0) != 0) {
+                       fprintf(stderr, "Daemonize failed\n");
+                       exit(1);
+               }
+
+               openlog("blkmapd", LOG_PID, 0);
+               pidfd = open(PID_FILE, O_WRONLY | O_CREAT, 0644);
+               if (pidfd < 0) {
+                       BL_LOG_ERR("Create pid file %s failed\n", PID_FILE);
+                       exit(1);
+               }
+
+               if (lockf(pidfd, F_TLOCK, 0) < 0) {
+                       BL_LOG_ERR("Lock pid file %s failed\n", PID_FILE);
+                       close(pidfd);
+                       exit(1);
+               }
+               ftruncate(pidfd, 0);
+               sprintf(pidbuf, "%d\n", getpid());
+               write(pidfd, pidbuf, strlen(pidbuf));
+       }
+
+       if (dflag) {
+               bl_discover_devices();
+               exit(0);
+       }
+
+       /* open pipe file */
+       fd = open(BL_PIPE_FILE, O_RDWR);
+       if (fd < 0) {
+               BL_LOG_ERR("open pipe file %s error\n", BL_PIPE_FILE);
+               exit(1);
+       }
+
+       while (1) {
+               /* discover device when needed */
+               bl_discover_devices();
+
+               ret = bl_run_disk_inquiry_process(fd);
+               if (ret < 0) {
+                       /* what should we do with process error? */
+                       BL_LOG_ERR("inquiry process return %d\n", ret);
+               }
+       }
+
+       if (pidfd >= 0) {
+               close(pidfd);
+               unlink(PID_FILE);
+       }
+
+       exit(ret);
+}