]> git.decadent.org.uk Git - nfs-utils.git/blobdiff - utils/blkmapd/dm-device.c
blkmapd: Add complex block layout discovery and mapping daemon
[nfs-utils.git] / utils / blkmapd / dm-device.c
diff --git a/utils/blkmapd/dm-device.c b/utils/blkmapd/dm-device.c
new file mode 100644 (file)
index 0000000..0f4f148
--- /dev/null
@@ -0,0 +1,518 @@
+/*
+ * dm-device.c: create or remove device via device mapper API.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/kdev_t.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libdevmapper.h>
+
+#include "device-discovery.h"
+
+#define DM_DEV_NAME_LEN                256
+
+#ifndef DM_MAX_TYPE_NAME
+#define DM_MAX_TYPE_NAME       16
+#endif
+
+#define DM_PARAMS_LEN          512     /* XXX: is this enough for target? */
+#define TYPE_HAS_DEV(type)     ((type == BLOCK_VOLUME_SIMPLE) || \
+                        (type == BLOCK_VOLUME_PSEUDO))
+
+struct bl_dm_table {
+       uint64_t offset;
+       uint64_t size;
+       char target_type[DM_MAX_TYPE_NAME];
+       char params[DM_PARAMS_LEN];
+       struct bl_dm_table *next;
+};
+
+struct bl_dm_tree {
+       uint64_t dev;
+       struct dm_tree *tree;
+       struct bl_dm_tree *next;
+};
+
+static const char dm_name[] = "pnfs_vol_%u";
+
+static unsigned int dev_count;
+
+static inline struct bl_dm_table *bl_dm_table_alloc(void)
+{
+       return (struct bl_dm_table *)calloc(1, sizeof(struct bl_dm_table));
+}
+
+static void bl_dm_table_free(struct bl_dm_table *bl_table_head)
+{
+       struct bl_dm_table *p;
+
+       while (bl_table_head) {
+               p = bl_table_head->next;
+               free(bl_table_head);
+               bl_table_head = p;
+       }
+}
+
+static void add_to_bl_dm_table(struct bl_dm_table **bl_table_head,
+                       struct bl_dm_table *table)
+{
+       struct bl_dm_table *p;
+
+       if (!*bl_table_head) {
+               *bl_table_head = table;
+               return;
+       }
+       p = *bl_table_head;
+       while (p->next)
+               p = p->next;
+       p->next = table;
+}
+
+struct bl_dm_tree *bl_tree_head;
+
+static struct bl_dm_tree *find_bl_dm_tree(uint64_t dev)
+{
+       struct bl_dm_tree *p;
+
+       for (p = bl_tree_head; p; p = p->next) {
+               if (p->dev == dev)
+                       break;
+       }
+       return p;
+}
+
+static void del_from_bl_dm_tree(uint64_t dev)
+{
+       struct bl_dm_tree *p, *pre = bl_tree_head;
+
+       for (p = pre; p; p = p->next) {
+               if (p->dev == dev) {
+                       pre->next = p->next;
+                       if (p == bl_tree_head)
+                               bl_tree_head = bl_tree_head->next;
+                       free(p);
+                       break;
+               }
+               pre = p;
+       }
+}
+
+static void add_to_bl_dm_tree(struct bl_dm_tree *tree)
+{
+       struct bl_dm_tree *p;
+
+       if (!bl_tree_head) {
+               bl_tree_head = tree;
+               return;
+       }
+       p = bl_tree_head;
+       while (p->next)
+               p = p->next;
+       p->next = tree;
+       return;
+}
+
+/*
+ * Create device via device mapper
+ * return 0 when creation failed
+ * return dev no for created device
+ */
+static uint64_t
+dm_device_create_mapped(const char *dev_name, struct bl_dm_table *p)
+{
+       struct dm_task *dmt;
+       struct dm_info dminfo;
+       int ret = 0;
+
+       dmt = dm_task_create(DM_DEVICE_CREATE);
+       if (!dmt) {
+               BL_LOG_ERR("Create dm_task for %s failed\n", dev_name);
+               return 0;
+       }
+       ret = dm_task_set_name(dmt, dev_name);
+       if (!ret)
+               goto err_out;
+
+       while (p) {
+               ret =
+                   dm_task_add_target(dmt, p->offset, p->size, p->target_type,
+                                      p->params);
+               if (!ret)
+                       goto err_out;
+               p = p->next;
+       }
+
+       ret = dm_task_run(dmt) && dm_task_get_info(dmt, &dminfo)
+           && dminfo.exists;
+
+       if (!ret)
+               goto err_out;
+
+       dm_task_update_nodes();
+
+ err_out:
+       dm_task_destroy(dmt);
+
+       if (!ret) {
+               BL_LOG_ERR("Create device %s failed\n", dev_name);
+               return 0;
+       }
+       return MKDEV(dminfo.major, dminfo.minor);
+}
+
+static int dm_device_remove_byname(const char *dev_name)
+{
+       struct dm_task *dmt;
+       int ret = 0;
+
+       BL_LOG_INFO("%s: %s\n", __func__, dev_name);
+
+       dmt = dm_task_create(DM_DEVICE_REMOVE);
+       if (!dmt)
+               return 0;
+
+       ret = dm_task_set_name(dmt, dev_name) && dm_task_run(dmt);
+
+       dm_task_update_nodes();
+       dm_task_destroy(dmt);
+
+       return ret;
+}
+
+int dm_device_remove(uint64_t dev)
+{
+       struct dm_task *dmt;
+       struct dm_names *dmnames;
+       char *name = NULL;
+       int ret = 0;
+
+       /* Look for dev_name via dev, if dev_name could be transferred here,
+          we could jump to DM_DEVICE_REMOVE directly */
+
+       dmt = dm_task_create(DM_DEVICE_LIST);
+       if (!dmt) {
+               BL_LOG_ERR("dm_task creation failed\n");
+               goto out;
+       }
+
+       ret = dm_task_run(dmt);
+       if (!ret) {
+               BL_LOG_ERR("dm_task_run failed\n");
+               goto out;
+       }
+
+       dmnames = dm_task_get_names(dmt);
+       if (!dmnames || !dmnames->dev) {
+               BL_LOG_ERR("dm_task_get_names failed\n");
+               goto out;
+       }
+
+       while (dmnames) {
+               if (dmnames->dev == dev) {
+                       name = strdup(dmnames->name);
+                       break;
+               }
+               dmnames = (void *)dmnames + dmnames->next;
+       }
+
+       if (!name) {
+               BL_LOG_ERR("Could not find device\n");
+               goto out;
+       }
+
+       dm_task_update_nodes();
+
+ out:
+       if (dmt)
+               dm_task_destroy(dmt);
+
+       /* Start to remove device */
+       if (name) {
+               ret = dm_device_remove_byname(name);
+               free(name);
+       }
+
+       return ret;
+}
+
+static void dm_devicelist_remove(unsigned int start, unsigned int end)
+{
+       char dev_name[DM_DEV_NAME_LEN];
+       unsigned int count;
+
+       if (start >= dev_count || end <= 1 || start >= end - 1)
+               return;
+
+       for (count = end - 1; count > start; count--) {
+               snprintf(dev_name, sizeof dev_name, dm_name, count - 1);
+               dm_device_remove_byname(dev_name);
+       }
+
+       return;
+}
+
+static void bl_dm_remove_tree(uint64_t dev)
+{
+       struct bl_dm_tree *p;
+
+       p = find_bl_dm_tree(dev);
+       if (!p)
+               return;
+
+       dm_tree_free(p->tree);
+       del_from_bl_dm_tree(dev);
+}
+
+static int bl_dm_create_tree(uint64_t dev)
+{
+       struct dm_tree *tree;
+       struct bl_dm_tree *bl_tree;
+
+       bl_tree = find_bl_dm_tree(dev);
+       if (bl_tree)
+               return 1;
+
+       tree = dm_tree_create();
+       if (!tree)
+               return 0;
+
+       if (!dm_tree_add_dev(tree, MAJOR(dev), MINOR(dev))) {
+               dm_tree_free(tree);
+               return 0;
+       }
+
+       bl_tree = malloc(sizeof(struct bl_dm_tree));
+       if (!bl_tree) {
+               dm_tree_free(tree);
+               return 0;
+       }
+
+       bl_tree->dev = dev;
+       bl_tree->tree = tree;
+       bl_tree->next = NULL;
+       add_to_bl_dm_tree(bl_tree);
+
+       return 1;
+}
+
+int dm_device_remove_all(uint64_t *dev)
+{
+       struct bl_dm_tree *p;
+       struct dm_tree_node *node;
+       const char *uuid;
+       int ret = 0;
+       uint32_t major, minor;
+       uint64_t bl_dev;
+
+       memcpy(&major, dev, sizeof(uint32_t));
+       memcpy(&minor, (void *)dev + sizeof(uint32_t), sizeof(uint32_t));
+       bl_dev = MKDEV(major, minor);
+       p = find_bl_dm_tree(bl_dev);
+       if (!p)
+               return ret;
+
+       node = dm_tree_find_node(p->tree, MAJOR(bl_dev), MINOR(bl_dev));
+       if (!node)
+               return ret;
+
+       uuid = dm_tree_node_get_uuid(node);
+       if (!uuid)
+               return ret;
+
+       dm_device_remove(bl_dev);
+       ret = dm_tree_deactivate_children(node, uuid, strlen(uuid));
+       dm_task_update_nodes();
+       bl_dm_remove_tree(bl_dev);
+
+       return ret;
+}
+
+static int dm_device_exists(char *dev_name)
+{
+       char fullname[DM_DEV_NAME_LEN];
+
+       snprintf(fullname, sizeof fullname, "/dev/mapper/%s", dev_name);
+       return (access(fullname, F_OK) >= 0);
+}
+
+/* TODO: check the value for DM_DEV_NAME_LEN, DM_TYPE_LEN, DM_PARAMS_LEN */
+uint64_t dm_device_create(struct bl_volume *vols, int num_vols)
+{
+       uint64_t size, stripe_unit, dev = 0;
+       unsigned int count = dev_count;
+       int volnum, i, pos;
+       struct bl_volume *node;
+       char *tmp;
+       struct bl_dm_table *table = NULL;
+       struct bl_dm_table *bl_table_head = NULL;
+       unsigned int len;
+       char *dev_name = NULL;
+
+       /* Create pseudo device here */
+       for (volnum = 0; volnum < num_vols; volnum++) {
+               node = &vols[volnum];
+               switch (node->bv_type) {
+               case BLOCK_VOLUME_SIMPLE:
+                       /* Do not need to create device here */
+                       dev = node->param.bv_dev;
+                       goto continued;
+               case BLOCK_VOLUME_SLICE:
+                       table = bl_dm_table_alloc();
+                       if (!table)
+                               goto out;
+                       table->offset = 0;
+                       table->size = node->bv_size;
+                       strcpy(table->target_type, "linear");
+                       if (!TYPE_HAS_DEV(node->bv_vols[0]->bv_type)) {
+                               free(table);
+                               goto out;
+                       }
+                       dev = node->bv_vols[0]->param.bv_dev;
+                       tmp = table->params;
+                       if (!dm_format_dev(tmp, DM_PARAMS_LEN,
+                                          MAJOR(dev), MINOR(dev))) {
+                               free(table);
+                               goto out;
+                       }
+                       tmp += strlen(tmp);
+                       sprintf(tmp, " %lu", node->param.bv_offset);
+                       add_to_bl_dm_table(&bl_table_head, table);
+                       break;
+               case BLOCK_VOLUME_STRIPE:
+                       table = bl_dm_table_alloc();
+                       if (!table)
+                               goto out;
+                       table->offset = 0;
+                       /* Truncate size to a stripe unit boundary */
+                       stripe_unit = node->param.bv_stripe_unit;
+                       table->size =
+                           node->bv_size - (node->bv_size % stripe_unit);
+                       strcpy(table->target_type, "striped");
+                       sprintf(table->params, "%d %llu %n", node->bv_vol_n,
+                               (long long unsigned) stripe_unit, &pos);
+                       /* Copy subdev major:minor to params */
+                       tmp = table->params + pos;
+                       len = DM_PARAMS_LEN - pos;
+                       for (i = 0; i < node->bv_vol_n; i++) {
+                               if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) {
+                                       free(table);
+                                       goto out;
+                               }
+                               dev = node->bv_vols[i]->param.bv_dev;
+                               if (!dm_format_dev(tmp, len, MAJOR(dev),
+                                                  MINOR(dev))) {
+                                       free(table);
+                                       goto out;
+                               }
+                               pos = strlen(tmp);
+                               tmp += pos;
+                               len -= pos;
+                               sprintf(tmp, " %d ", 0);
+                               tmp += 3;
+                               len -= 3;
+                       }
+                       add_to_bl_dm_table(&bl_table_head, table);
+                       break;
+               case BLOCK_VOLUME_CONCAT:
+                       size = 0;
+                       for (i = 0; i < node->bv_vol_n; i++) {
+                               table = bl_dm_table_alloc();
+                               if (!table)
+                                       goto out;
+                               table->offset = size;
+                               table->size = node->bv_vols[i]->bv_size;
+                               if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) {
+                                       free(table);
+                                       goto out;
+                               }
+                               strcpy(table->target_type, "linear");
+                               tmp = table->params;
+                               dev = node->bv_vols[i]->param.bv_dev;
+                               if (!dm_format_dev(tmp, DM_PARAMS_LEN,
+                                                  MAJOR(dev), MINOR(dev))) {
+                                       free(table);
+                                       goto out;
+                               }
+                               tmp += strlen(tmp);
+                               sprintf(tmp, " %d", 0);
+                               size += table->size;
+                               add_to_bl_dm_table(&bl_table_head, table);
+                       }
+                       break;
+               default:
+                       /* Delete previous temporary devices */
+                       dm_devicelist_remove(count, dev_count);
+                       goto out;
+               }               /* end of swtich */
+               /* Create dev_name here. Name of device is pnfs_vol_XXX */
+               if (dev_name)
+                       free(dev_name);
+               dev_name = (char *)calloc(DM_DEV_NAME_LEN, sizeof(char));
+               if (!dev_name) {
+                       BL_LOG_ERR("%s: Out of memory\n", __func__);
+                       goto out;
+               }
+               do {
+                       snprintf(dev_name, DM_DEV_NAME_LEN, dm_name,
+                                dev_count++);
+               } while (dm_device_exists(dev_name));
+
+               dev = dm_device_create_mapped(dev_name, bl_table_head);
+               BL_LOG_INFO("%s: %d %s %d:%d\n", __func__, volnum, dev_name,
+                           (int) MAJOR(dev), (int) MINOR(dev));
+               if (!dev) {
+                       /* Delete previous temporary devices */
+                       dm_devicelist_remove(count, dev_count);
+                       goto out;
+               }
+               node->param.bv_dev = dev;
+               /* TODO: extend use with PSEUDO later */
+               node->bv_type = BLOCK_VOLUME_PSEUDO;
+
+ continued:
+               if (bl_table_head)
+                       bl_dm_table_free(bl_table_head);
+               bl_table_head = NULL;
+       }
+ out:
+       if (bl_table_head) {
+               bl_dm_table_free(bl_table_head);
+               bl_table_head = NULL;
+       }
+       if (dev)
+               bl_dm_create_tree(dev);
+       if (dev_name)
+               free(dev_name);
+       return dev;
+}