2 * device-discovery.c: main function, discovering device and processing
3 * pipe request from kernel.
5 * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/types.h>
31 #include <sys/ioctl.h>
32 #include <sys/mount.h>
33 #include <sys/select.h>
34 #include <sys/inotify.h>
35 #include <linux/kdev_t.h>
36 #include <scsi/scsi.h>
37 #include <scsi/scsi_ioctl.h>
51 #include <libdevmapper.h>
53 #include "device-discovery.h"
55 #define EVENT_SIZE (sizeof(struct inotify_event))
56 #define EVENT_BUFSIZE (1024 * EVENT_SIZE)
58 #define BL_PIPE_FILE "/var/lib/nfs/rpc_pipefs/nfs/blocklayout"
59 #define NFSPIPE_DIR "/var/lib/nfs/rpc_pipefs/nfs"
60 #define RPCPIPE_DIR "/var/lib/nfs/rpc_pipefs"
61 #define PID_FILE "/var/run/blkmapd.pid"
63 struct bl_disk *visible_disk_list;
64 int bl_watch_fd, bl_pipe_fd, nfs_pipedir_wfd, rpc_pipedir_wfd;
67 struct bl_disk_path *bl_get_path(const char *filepath,
68 struct bl_disk_path *paths)
70 struct bl_disk_path *tmp = paths;
73 if (!strcmp(tmp->full_path, filepath))
80 /* Check whether valid_path is a substring(partition) of path */
81 int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path)
83 if (!strncmp(valid_path->full_path, path->full_path,
84 strlen(valid_path->full_path)))
91 * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO,
92 * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to
93 * create pseudo device. So if state is higher, the device path needs to
95 * If device-mapper multipath support is a must, pseudo devices should
96 * exist for each multipath device. If not, active device path will be
97 * chosen for device creation.
98 * Treat partition as invalid path.
100 int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state,
101 struct bl_disk *disk)
103 struct bl_disk_path *valid_path = disk->valid_path;
106 if (valid_path->state >= state) {
107 if (bl_is_partition(valid_path, path))
114 void bl_release_disk(void)
116 struct bl_disk *disk;
117 struct bl_disk_path *path = NULL;
119 while (visible_disk_list) {
120 disk = visible_disk_list;
123 disk->paths = path->next;
124 free(path->full_path);
130 visible_disk_list = disk->next;
135 void bl_add_disk(char *filepath)
137 struct bl_disk *disk = NULL;
141 struct bl_serial *serial = NULL;
142 enum bl_path_state_e ap_state;
143 struct bl_disk_path *diskpath = NULL, *path = NULL;
146 fd = open(filepath, O_RDONLY | O_LARGEFILE);
150 if (fstat(fd, &sb)) {
156 ioctl(fd, BLKGETSIZE, &size);
166 serial = bldev_read_serial(fd, filepath);
167 if (dm_is_dm_major(major(dev)))
168 ap_state = BL_PATH_STATE_PSEUDO;
170 ap_state = bldev_read_ap_state(fd);
173 if (ap_state != BL_PATH_STATE_ACTIVE)
176 for (disk = visible_disk_list; disk != NULL; disk = disk->next) {
177 /* Already scanned or a partition?
178 * XXX: if released each time, maybe not need to compare
180 if ((serial->len == disk->serial->len) &&
181 !memcmp(serial->data, disk->serial->data, serial->len)) {
182 diskpath = bl_get_path(filepath, disk->paths);
187 if (disk && diskpath)
191 path = malloc(sizeof(struct bl_disk_path));
193 BL_LOG_ERR("%s: Out of memory!\n", __func__);
197 path->state = ap_state;
198 path->full_path = strdup(filepath);
199 if (!path->full_path)
202 if (!disk) { /* add disk */
203 disk = malloc(sizeof(struct bl_disk));
205 BL_LOG_ERR("%s: Out of memory!\n", __func__);
208 disk->next = visible_disk_list;
211 disk->serial = serial;
212 disk->valid_path = path;
214 visible_disk_list = disk;
216 path->next = disk->paths;
218 /* check whether we need to update disk info */
219 if (bl_update_path(path, path->state, disk)) {
222 disk->valid_path = path;
230 free(path->full_path);
236 int bl_discover_devices(void)
240 char buf[PATH_MAX], devname[PATH_MAX], fulldevname[PATH_MAX];
242 /* release previous list */
245 /* scan all block devices */
246 f = fopen("/proc/partitions", "r");
251 if (fgets(buf, sizeof buf, f) == NULL)
253 n = sscanf(buf, "%*d %*d %*d %31s", devname);
256 snprintf(fulldevname, sizeof fulldevname, "/sys/block/%s",
258 if (access(fulldevname, F_OK) < 0)
260 snprintf(fulldevname, sizeof fulldevname, "/dev/%s", devname);
261 bl_add_disk(fulldevname);
269 /* process kernel request
270 * return 0: request processed, and no more request waiting;
271 * return 1: request processed, and more requests waiting;
274 static int bl_disk_inquiry_process(int fd)
277 struct bl_pipemsg_hdr head;
279 uint32_t major, minor;
281 struct bl_dev_msg reply;
284 if (atomicio(read, fd, &head, sizeof(head)) != sizeof(head)) {
285 /* Note that an error in this or the next read is pretty
286 * catastrophic, as there is no good way to resync into
289 BL_LOG_ERR("Read pipefs head error!\n");
294 buflen = head.totallen;
295 buf = malloc(buflen);
297 BL_LOG_ERR("%s: Out of memory!\n", __func__);
302 if (atomicio(read, fd, buf, buflen) != buflen) {
303 BL_LOG_ERR("Read pipefs content error!\n");
308 reply.status = BL_DEVICE_REQUEST_PROC;
311 case BL_DEVICE_MOUNT:
313 * It shouldn't be necessary to discover devices here, since
314 * process_deviceinfo() will re-discover if it can't find
315 * the devices it needs. But in the case of multipath
316 * devices (ones that appear more than once, for example an
317 * active and a standby LUN), this will re-order them in the
320 bl_discover_devices();
321 if (!process_deviceinfo(buf, buflen, &major, &minor)) {
322 reply.status = BL_DEVICE_REQUEST_ERR;
328 case BL_DEVICE_UMOUNT:
329 if (!dm_device_remove_all((uint64_t *) buf))
330 reply.status = BL_DEVICE_REQUEST_ERR;
333 reply.status = BL_DEVICE_REQUEST_ERR;
337 /* write to pipefs */
338 if (atomicio((void *)write, fd, &reply, sizeof(reply))
340 BL_LOG_ERR("Write pipefs error!\n");
350 static void bl_watch_dir(const char* dir, int *wd)
352 *wd = inotify_add_watch(bl_watch_fd, dir, IN_CREATE|IN_DELETE);
354 BL_LOG_ERR("failed to watch %s: %s\n", dir, strerror(errno));
357 static void bl_rpcpipe_cb(void)
359 int rc, curr_byte = 0;
360 char eventArr[EVENT_BUFSIZE];
361 struct inotify_event *event;
363 rc = read(bl_watch_fd, &eventArr, EVENT_BUFSIZE);
365 BL_LOG_ERR("read event fail: %s", strerror(errno));
367 while (rc > curr_byte) {
368 event = (struct inotify_event *)&eventArr[curr_byte];
369 curr_byte += EVENT_SIZE + event->len;
370 if (event->wd == rpc_pipedir_wfd) {
371 if (strncmp(event->name, "nfs", 3))
373 if (event->mask & IN_CREATE) {
374 BL_LOG_WARNING("nfs pipe dir created\n");
375 bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd);
376 bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
377 } else if (event->mask & IN_DELETE) {
378 BL_LOG_WARNING("nfs pipe dir deleted\n");
379 inotify_rm_watch(bl_watch_fd, nfs_pipedir_wfd);
381 nfs_pipedir_wfd = -1;
384 } else if (event->wd == nfs_pipedir_wfd) {
385 if (strncmp(event->name, "blocklayout", 11))
387 if (event->mask & IN_CREATE) {
388 BL_LOG_WARNING("blocklayout pipe file created\n");
389 bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
391 BL_LOG_ERR("open %s failed: %s\n",
392 event->name, strerror(errno));
393 } else if (event->mask & IN_DELETE) {
394 BL_LOG_WARNING("blocklayout pipe file deleted\n");
402 static int bl_event_helper(void)
409 FD_SET(bl_watch_fd, &rset);
411 FD_SET(bl_pipe_fd, &rset);
412 maxfd = (bl_watch_fd>bl_pipe_fd)?bl_watch_fd:bl_pipe_fd;
413 switch (select(maxfd + 1, &rset, NULL, NULL, NULL)) {
424 if (FD_ISSET(bl_watch_fd, &rset))
426 else if (bl_pipe_fd > 0 && FD_ISSET(bl_pipe_fd, &rset))
427 ret = bl_disk_inquiry_process(bl_pipe_fd);
436 void sig_die(int signal)
442 BL_LOG_ERR("exit on signal(%d)\n", signal);
447 int main(int argc, char **argv)
449 int opt, dflag = 0, fg = 0, ret = 1;
453 while ((opt = getopt(argc, argv, "df")) != -1) {
465 openlog("blkmapd", LOG_PERROR, 0);
467 if (!stat(PID_FILE, &statbuf)) {
468 fprintf(stderr, "Pid file %s already existed\n", PID_FILE);
472 if (daemon(0, 0) != 0) {
473 fprintf(stderr, "Daemonize failed\n");
477 openlog("blkmapd", LOG_PID, 0);
478 pidfd = open(PID_FILE, O_WRONLY | O_CREAT, 0644);
480 BL_LOG_ERR("Create pid file %s failed\n", PID_FILE);
484 if (lockf(pidfd, F_TLOCK, 0) < 0) {
485 BL_LOG_ERR("Lock pid file %s failed\n", PID_FILE);
490 sprintf(pidbuf, "%d\n", getpid());
491 write(pidfd, pidbuf, strlen(pidbuf));
494 signal(SIGINT, sig_die);
495 signal(SIGTERM, sig_die);
496 signal(SIGHUP, SIG_IGN);
499 bl_discover_devices();
503 if ((bl_watch_fd = inotify_init()) < 0) {
504 BL_LOG_ERR("init inotify failed %s\n", strerror(errno));
509 bl_watch_dir(RPCPIPE_DIR, &rpc_pipedir_wfd);
510 bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd);
512 bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
514 BL_LOG_ERR("open pipe file %s failed: %s\n", BL_PIPE_FILE, strerror(errno));
517 /* discover device when needed */
518 bl_discover_devices();
520 ret = bl_event_helper();
522 /* what should we do with process error? */
523 BL_LOG_ERR("inquiry process return %d\n", ret);