| 1 | /* tapdisk.c |
|---|
| 2 | * |
|---|
| 3 | * separate disk process, spawned by blktapctrl. Inherits code from driver |
|---|
| 4 | * plugins |
|---|
| 5 | * |
|---|
| 6 | * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield. |
|---|
| 7 | * |
|---|
| 8 | */ |
|---|
| 9 | |
|---|
| 10 | #define MSG_SIZE 4096 |
|---|
| 11 | #define TAPDISK |
|---|
| 12 | |
|---|
| 13 | #include <stdio.h> |
|---|
| 14 | #include <stdlib.h> |
|---|
| 15 | #include <sys/mman.h> |
|---|
| 16 | #include <fcntl.h> |
|---|
| 17 | #include <string.h> |
|---|
| 18 | #include <signal.h> |
|---|
| 19 | #include <sys/stat.h> |
|---|
| 20 | #include <sys/types.h> |
|---|
| 21 | #include <sys/poll.h> |
|---|
| 22 | #include <unistd.h> |
|---|
| 23 | #include <errno.h> |
|---|
| 24 | #include <pthread.h> |
|---|
| 25 | #include <time.h> |
|---|
| 26 | #include <err.h> |
|---|
| 27 | #include <poll.h> |
|---|
| 28 | #include <sys/statvfs.h> |
|---|
| 29 | #include <sys/ioctl.h> |
|---|
| 30 | #include <linux/fs.h> |
|---|
| 31 | #include "blktaplib.h" |
|---|
| 32 | #include "tapdisk.h" |
|---|
| 33 | |
|---|
| 34 | #if 1 |
|---|
| 35 | #define ASSERT(_p) \ |
|---|
| 36 | if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \ |
|---|
| 37 | __LINE__, __FILE__); *(int*)0=0; } |
|---|
| 38 | #else |
|---|
| 39 | #define ASSERT(_p) ((void)0) |
|---|
| 40 | #endif |
|---|
| 41 | |
|---|
| 42 | #define INPUT 0 |
|---|
| 43 | #define OUTPUT 1 |
|---|
| 44 | |
|---|
| 45 | static int maxfds, fds[2], run = 1; |
|---|
| 46 | |
|---|
| 47 | static pid_t process; |
|---|
| 48 | int connected_disks = 0; |
|---|
| 49 | fd_list_entry_t *fd_start = NULL; |
|---|
| 50 | |
|---|
| 51 | int do_cow_read(struct disk_driver *dd, blkif_request_t *req, |
|---|
| 52 | int sidx, uint64_t sector, int nr_secs); |
|---|
| 53 | |
|---|
| 54 | #define td_for_each_disk(tds, drv) \ |
|---|
| 55 | for (drv = tds->disks; drv != NULL; drv = drv->next) |
|---|
| 56 | |
|---|
| 57 | void usage(void) |
|---|
| 58 | { |
|---|
| 59 | fprintf(stderr, "blktap-utils: v1.0.0\n"); |
|---|
| 60 | fprintf(stderr, "usage: tapdisk <READ fifo> <WRITE fifo>\n"); |
|---|
| 61 | exit(-1); |
|---|
| 62 | } |
|---|
| 63 | |
|---|
| 64 | void daemonize(void) |
|---|
| 65 | { |
|---|
| 66 | int i; |
|---|
| 67 | |
|---|
| 68 | if (getppid()==1) return; /* already a daemon */ |
|---|
| 69 | if (fork() != 0) exit(0); |
|---|
| 70 | |
|---|
| 71 | #if 0 |
|---|
| 72 | /*Set new program session ID and close all descriptors*/ |
|---|
| 73 | setsid(); |
|---|
| 74 | for (i = getdtablesize(); i >= 0; --i) close(i); |
|---|
| 75 | |
|---|
| 76 | /*Send all I/O to /dev/null */ |
|---|
| 77 | i = open("/dev/null",O_RDWR); |
|---|
| 78 | dup(i); |
|---|
| 79 | dup(i); |
|---|
| 80 | #endif |
|---|
| 81 | return; |
|---|
| 82 | } |
|---|
| 83 | |
|---|
| 84 | static void free_driver(struct disk_driver *d) |
|---|
| 85 | { |
|---|
| 86 | if (d->name) |
|---|
| 87 | free(d->name); |
|---|
| 88 | if (d->private) |
|---|
| 89 | free(d->private); |
|---|
| 90 | free(d); |
|---|
| 91 | } |
|---|
| 92 | |
|---|
| 93 | static void unmap_disk(struct td_state *s) |
|---|
| 94 | { |
|---|
| 95 | tapdev_info_t *info = s->ring_info; |
|---|
| 96 | struct disk_driver *dd, *tmp; |
|---|
| 97 | fd_list_entry_t *entry; |
|---|
| 98 | |
|---|
| 99 | dd = s->disks; |
|---|
| 100 | while (dd) { |
|---|
| 101 | tmp = dd->next; |
|---|
| 102 | dd->drv->td_close(dd); |
|---|
| 103 | free_driver(dd); |
|---|
| 104 | dd = tmp; |
|---|
| 105 | } |
|---|
| 106 | |
|---|
| 107 | if (info != NULL && info->mem > 0) |
|---|
| 108 | munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE); |
|---|
| 109 | |
|---|
| 110 | entry = s->fd_entry; |
|---|
| 111 | *entry->pprev = entry->next; |
|---|
| 112 | if (entry->next) |
|---|
| 113 | entry->next->pprev = entry->pprev; |
|---|
| 114 | |
|---|
| 115 | close(info->fd); |
|---|
| 116 | |
|---|
| 117 | free(s->fd_entry); |
|---|
| 118 | free(s->blkif); |
|---|
| 119 | free(s->ring_info); |
|---|
| 120 | free(s); |
|---|
| 121 | |
|---|
| 122 | return; |
|---|
| 123 | } |
|---|
| 124 | |
|---|
| 125 | void sig_handler(int sig) |
|---|
| 126 | { |
|---|
| 127 | /*Received signal to close. If no disks are active, we close app.*/ |
|---|
| 128 | |
|---|
| 129 | if (connected_disks < 1) run = 0; |
|---|
| 130 | } |
|---|
| 131 | |
|---|
| 132 | static inline int LOCAL_FD_SET(fd_set *readfds) |
|---|
| 133 | { |
|---|
| 134 | fd_list_entry_t *ptr; |
|---|
| 135 | struct disk_driver *dd; |
|---|
| 136 | |
|---|
| 137 | ptr = fd_start; |
|---|
| 138 | while (ptr != NULL) { |
|---|
| 139 | if (ptr->tap_fd) { |
|---|
| 140 | FD_SET(ptr->tap_fd, readfds); |
|---|
| 141 | td_for_each_disk(ptr->s, dd) { |
|---|
| 142 | if (dd->io_fd[READ]) |
|---|
| 143 | FD_SET(dd->io_fd[READ], readfds); |
|---|
| 144 | maxfds = (dd->io_fd[READ] > maxfds ? |
|---|
| 145 | dd->io_fd[READ] : maxfds); |
|---|
| 146 | } |
|---|
| 147 | maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd : maxfds); |
|---|
| 148 | } |
|---|
| 149 | ptr = ptr->next; |
|---|
| 150 | } |
|---|
| 151 | |
|---|
| 152 | return 0; |
|---|
| 153 | } |
|---|
| 154 | |
|---|
| 155 | static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s) |
|---|
| 156 | { |
|---|
| 157 | fd_list_entry_t **pprev, *entry; |
|---|
| 158 | int i; |
|---|
| 159 | |
|---|
| 160 | DPRINTF("Adding fd_list_entry\n"); |
|---|
| 161 | |
|---|
| 162 | /*Add to linked list*/ |
|---|
| 163 | s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); |
|---|
| 164 | entry->tap_fd = tap_fd; |
|---|
| 165 | entry->s = s; |
|---|
| 166 | entry->next = NULL; |
|---|
| 167 | |
|---|
| 168 | pprev = &fd_start; |
|---|
| 169 | while (*pprev != NULL) |
|---|
| 170 | pprev = &(*pprev)->next; |
|---|
| 171 | |
|---|
| 172 | *pprev = entry; |
|---|
| 173 | entry->pprev = pprev; |
|---|
| 174 | |
|---|
| 175 | return entry; |
|---|
| 176 | } |
|---|
| 177 | |
|---|
| 178 | static inline struct td_state *get_state(int cookie) |
|---|
| 179 | { |
|---|
| 180 | fd_list_entry_t *ptr; |
|---|
| 181 | |
|---|
| 182 | ptr = fd_start; |
|---|
| 183 | while (ptr != NULL) { |
|---|
| 184 | if (ptr->cookie == cookie) return ptr->s; |
|---|
| 185 | ptr = ptr->next; |
|---|
| 186 | } |
|---|
| 187 | return NULL; |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | static struct tap_disk *get_driver(int drivertype) |
|---|
| 191 | { |
|---|
| 192 | /* blktapctrl has passed us the driver type */ |
|---|
| 193 | |
|---|
| 194 | return dtypes[drivertype]->drv; |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | static struct td_state *state_init(void) |
|---|
| 198 | { |
|---|
| 199 | int i; |
|---|
| 200 | struct td_state *s; |
|---|
| 201 | blkif_t *blkif; |
|---|
| 202 | |
|---|
| 203 | s = malloc(sizeof(struct td_state)); |
|---|
| 204 | blkif = s->blkif = malloc(sizeof(blkif_t)); |
|---|
| 205 | s->ring_info = calloc(1, sizeof(tapdev_info_t)); |
|---|
| 206 | |
|---|
| 207 | for (i = 0; i < MAX_REQUESTS; i++) { |
|---|
| 208 | blkif->pending_list[i].secs_pending = 0; |
|---|
| 209 | blkif->pending_list[i].submitting = 0; |
|---|
| 210 | } |
|---|
| 211 | |
|---|
| 212 | return s; |
|---|
| 213 | } |
|---|
| 214 | |
|---|
| 215 | static int map_new_dev(struct td_state *s, int minor) |
|---|
| 216 | { |
|---|
| 217 | int tap_fd; |
|---|
| 218 | tapdev_info_t *info = s->ring_info; |
|---|
| 219 | char *devname; |
|---|
| 220 | fd_list_entry_t *ptr; |
|---|
| 221 | int page_size; |
|---|
| 222 | |
|---|
| 223 | asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor); |
|---|
| 224 | tap_fd = open(devname, O_RDWR); |
|---|
| 225 | if (tap_fd == -1) |
|---|
| 226 | { |
|---|
| 227 | DPRINTF("open failed on dev %s!",devname); |
|---|
| 228 | goto fail; |
|---|
| 229 | } |
|---|
| 230 | info->fd = tap_fd; |
|---|
| 231 | |
|---|
| 232 | /*Map the shared memory*/ |
|---|
| 233 | page_size = getpagesize(); |
|---|
| 234 | info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE, |
|---|
| 235 | PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0); |
|---|
| 236 | if ((long int)info->mem == -1) |
|---|
| 237 | { |
|---|
| 238 | DPRINTF("mmap failed on dev %s!\n",devname); |
|---|
| 239 | goto fail; |
|---|
| 240 | } |
|---|
| 241 | |
|---|
| 242 | /* assign the rings to the mapped memory */ |
|---|
| 243 | info->sring = (blkif_sring_t *)((unsigned long)info->mem); |
|---|
| 244 | BACK_RING_INIT(&info->fe_ring, info->sring, page_size); |
|---|
| 245 | |
|---|
| 246 | info->vstart = |
|---|
| 247 | (unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size); |
|---|
| 248 | |
|---|
| 249 | ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process ); |
|---|
| 250 | ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); |
|---|
| 251 | free(devname); |
|---|
| 252 | |
|---|
| 253 | /*Update the fd entry*/ |
|---|
| 254 | ptr = fd_start; |
|---|
| 255 | while (ptr != NULL) { |
|---|
| 256 | if (s == ptr->s) { |
|---|
| 257 | ptr->tap_fd = tap_fd; |
|---|
| 258 | break; |
|---|
| 259 | } |
|---|
| 260 | ptr = ptr->next; |
|---|
| 261 | } |
|---|
| 262 | |
|---|
| 263 | return minor; |
|---|
| 264 | |
|---|
| 265 | fail: |
|---|
| 266 | free(devname); |
|---|
| 267 | return -1; |
|---|
| 268 | } |
|---|
| 269 | |
|---|
| 270 | static struct disk_driver *disk_init(struct td_state *s, |
|---|
| 271 | struct tap_disk *drv, |
|---|
| 272 | char *name, td_flag_t flags) |
|---|
| 273 | { |
|---|
| 274 | struct disk_driver *dd; |
|---|
| 275 | |
|---|
| 276 | dd = calloc(1, sizeof(struct disk_driver)); |
|---|
| 277 | if (!dd) |
|---|
| 278 | return NULL; |
|---|
| 279 | |
|---|
| 280 | dd->private = malloc(drv->private_data_size); |
|---|
| 281 | if (!dd->private) { |
|---|
| 282 | free(dd); |
|---|
| 283 | return NULL; |
|---|
| 284 | } |
|---|
| 285 | |
|---|
| 286 | dd->drv = drv; |
|---|
| 287 | dd->td_state = s; |
|---|
| 288 | dd->name = name; |
|---|
| 289 | dd->flags = flags; |
|---|
| 290 | |
|---|
| 291 | return dd; |
|---|
| 292 | } |
|---|
| 293 | |
|---|
| 294 | static int open_disk(struct td_state *s, |
|---|
| 295 | struct tap_disk *drv, char *path, td_flag_t flags) |
|---|
| 296 | { |
|---|
| 297 | int err; |
|---|
| 298 | char *dup; |
|---|
| 299 | td_flag_t pflags; |
|---|
| 300 | struct disk_id id; |
|---|
| 301 | struct disk_driver *d; |
|---|
| 302 | |
|---|
| 303 | dup = strdup(path); |
|---|
| 304 | if (!dup) |
|---|
| 305 | return -ENOMEM; |
|---|
| 306 | |
|---|
| 307 | memset(&id, 0, sizeof(struct disk_id)); |
|---|
| 308 | s->disks = d = disk_init(s, drv, dup, flags); |
|---|
| 309 | if (!d) |
|---|
| 310 | return -ENOMEM; |
|---|
| 311 | |
|---|
| 312 | err = drv->td_open(d, path, flags); |
|---|
| 313 | if (err) { |
|---|
| 314 | free_driver(d); |
|---|
| 315 | s->disks = NULL; |
|---|
| 316 | return -ENOMEM; |
|---|
| 317 | } |
|---|
| 318 | pflags = flags | TD_RDONLY; |
|---|
| 319 | |
|---|
| 320 | /* load backing files as necessary */ |
|---|
| 321 | while ((err = d->drv->td_get_parent_id(d, &id)) == 0) { |
|---|
| 322 | struct disk_driver *new; |
|---|
| 323 | |
|---|
| 324 | if (id.drivertype > MAX_DISK_TYPES || |
|---|
| 325 | !get_driver(id.drivertype) || !id.name) |
|---|
| 326 | goto fail; |
|---|
| 327 | |
|---|
| 328 | dup = strdup(id.name); |
|---|
| 329 | if (!dup) |
|---|
| 330 | goto fail; |
|---|
| 331 | |
|---|
| 332 | new = disk_init(s, get_driver(id.drivertype), dup, pflags); |
|---|
| 333 | if (!new) |
|---|
| 334 | goto fail; |
|---|
| 335 | |
|---|
| 336 | err = new->drv->td_open(new, new->name, pflags); |
|---|
| 337 | if (err) |
|---|
| 338 | goto fail; |
|---|
| 339 | |
|---|
| 340 | err = d->drv->td_validate_parent(d, new, 0); |
|---|
| 341 | if (err) { |
|---|
| 342 | d->next = new; |
|---|
| 343 | goto fail; |
|---|
| 344 | } |
|---|
| 345 | |
|---|
| 346 | d = d->next = new; |
|---|
| 347 | free(id.name); |
|---|
| 348 | } |
|---|
| 349 | |
|---|
| 350 | s->info |= ((flags & TD_RDONLY) ? VDISK_READONLY : 0); |
|---|
| 351 | |
|---|
| 352 | if (err >= 0) |
|---|
| 353 | return 0; |
|---|
| 354 | |
|---|
| 355 | fail: |
|---|
| 356 | DPRINTF("failed opening disk\n"); |
|---|
| 357 | if (id.name) |
|---|
| 358 | free(id.name); |
|---|
| 359 | d = s->disks; |
|---|
| 360 | while (d) { |
|---|
| 361 | struct disk_driver *tmp = d->next; |
|---|
| 362 | d->drv->td_close(d); |
|---|
| 363 | free_driver(d); |
|---|
| 364 | d = tmp; |
|---|
| 365 | } |
|---|
| 366 | s->disks = NULL; |
|---|
| 367 | return -1; |
|---|
| 368 | } |
|---|
| 369 | |
|---|
| 370 | static int read_msg(char *buf) |
|---|
| 371 | { |
|---|
| 372 | int length, len, msglen, tap_fd, *io_fd; |
|---|
| 373 | char *ptr, *path; |
|---|
| 374 | image_t *img; |
|---|
| 375 | msg_hdr_t *msg; |
|---|
| 376 | msg_newdev_t *msg_dev; |
|---|
| 377 | msg_pid_t *msg_pid; |
|---|
| 378 | struct tap_disk *drv; |
|---|
| 379 | int ret = -1; |
|---|
| 380 | struct td_state *s = NULL; |
|---|
| 381 | fd_list_entry_t *entry; |
|---|
| 382 | |
|---|
| 383 | length = read(fds[READ], buf, MSG_SIZE); |
|---|
| 384 | |
|---|
| 385 | if (length > 0 && length >= sizeof(msg_hdr_t)) |
|---|
| 386 | { |
|---|
| 387 | msg = (msg_hdr_t *)buf; |
|---|
| 388 | DPRINTF("Tapdisk: Received msg, len %d, type %d, UID %d\n", |
|---|
| 389 | length,msg->type,msg->cookie); |
|---|
| 390 | |
|---|
| 391 | switch (msg->type) { |
|---|
| 392 | case CTLMSG_PARAMS: |
|---|
| 393 | ptr = buf + sizeof(msg_hdr_t); |
|---|
| 394 | len = (length - sizeof(msg_hdr_t)); |
|---|
| 395 | path = calloc(1, len); |
|---|
| 396 | |
|---|
| 397 | memcpy(path, ptr, len); |
|---|
| 398 | DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path); |
|---|
| 399 | |
|---|
| 400 | /*Assign driver*/ |
|---|
| 401 | drv = get_driver(msg->drivertype); |
|---|
| 402 | if (drv == NULL) |
|---|
| 403 | goto params_done; |
|---|
| 404 | |
|---|
| 405 | DPRINTF("Loaded driver: name [%s], type [%d]\n", |
|---|
| 406 | drv->disk_type, msg->drivertype); |
|---|
| 407 | |
|---|
| 408 | /* Allocate the disk structs */ |
|---|
| 409 | s = state_init(); |
|---|
| 410 | if (s == NULL) |
|---|
| 411 | goto params_done; |
|---|
| 412 | |
|---|
| 413 | /*Open file*/ |
|---|
| 414 | ret = open_disk(s, drv, path, |
|---|
| 415 | ((msg->readonly) ? TD_RDONLY : 0)); |
|---|
| 416 | if (ret) |
|---|
| 417 | goto params_done; |
|---|
| 418 | |
|---|
| 419 | entry = add_fd_entry(0, s); |
|---|
| 420 | entry->cookie = msg->cookie; |
|---|
| 421 | DPRINTF("Entered cookie %d\n", entry->cookie); |
|---|
| 422 | |
|---|
| 423 | memset(buf, 0x00, MSG_SIZE); |
|---|
| 424 | |
|---|
| 425 | params_done: |
|---|
| 426 | if (ret == 0) { |
|---|
| 427 | msglen = sizeof(msg_hdr_t) + sizeof(image_t); |
|---|
| 428 | msg->type = CTLMSG_IMG; |
|---|
| 429 | img = (image_t *)(buf + sizeof(msg_hdr_t)); |
|---|
| 430 | img->size = s->size; |
|---|
| 431 | img->secsize = s->sector_size; |
|---|
| 432 | img->info = s->info; |
|---|
| 433 | } else { |
|---|
| 434 | msglen = sizeof(msg_hdr_t); |
|---|
| 435 | msg->type = CTLMSG_IMG_FAIL; |
|---|
| 436 | msg->len = msglen; |
|---|
| 437 | } |
|---|
| 438 | len = write(fds[WRITE], buf, msglen); |
|---|
| 439 | free(path); |
|---|
| 440 | return 1; |
|---|
| 441 | |
|---|
| 442 | case CTLMSG_NEWDEV: |
|---|
| 443 | msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t)); |
|---|
| 444 | |
|---|
| 445 | s = get_state(msg->cookie); |
|---|
| 446 | DPRINTF("Retrieving state, cookie %d.....[%s]\n", |
|---|
| 447 | msg->cookie, (s == NULL ? "FAIL":"OK")); |
|---|
| 448 | if (s != NULL) { |
|---|
| 449 | ret = ((map_new_dev(s, msg_dev->devnum) |
|---|
| 450 | == msg_dev->devnum ? 0: -1)); |
|---|
| 451 | connected_disks++; |
|---|
| 452 | } |
|---|
| 453 | |
|---|
| 454 | memset(buf, 0x00, MSG_SIZE); |
|---|
| 455 | msglen = sizeof(msg_hdr_t); |
|---|
| 456 | msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP |
|---|
| 457 | : CTLMSG_NEWDEV_FAIL); |
|---|
| 458 | msg->len = msglen; |
|---|
| 459 | |
|---|
| 460 | len = write(fds[WRITE], buf, msglen); |
|---|
| 461 | return 1; |
|---|
| 462 | |
|---|
| 463 | case CTLMSG_CLOSE: |
|---|
| 464 | s = get_state(msg->cookie); |
|---|
| 465 | if (s) unmap_disk(s); |
|---|
| 466 | |
|---|
| 467 | connected_disks--; |
|---|
| 468 | sig_handler(SIGINT); |
|---|
| 469 | |
|---|
| 470 | return 1; |
|---|
| 471 | |
|---|
| 472 | case CTLMSG_PID: |
|---|
| 473 | memset(buf, 0x00, MSG_SIZE); |
|---|
| 474 | msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t); |
|---|
| 475 | msg->type = CTLMSG_PID_RSP; |
|---|
| 476 | msg->len = msglen; |
|---|
| 477 | |
|---|
| 478 | msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t)); |
|---|
| 479 | process = getpid(); |
|---|
| 480 | msg_pid->pid = process; |
|---|
| 481 | |
|---|
| 482 | len = write(fds[WRITE], buf, msglen); |
|---|
| 483 | return 1; |
|---|
| 484 | |
|---|
| 485 | default: |
|---|
| 486 | return 0; |
|---|
| 487 | } |
|---|
| 488 | } |
|---|
| 489 | return 0; |
|---|
| 490 | } |
|---|
| 491 | |
|---|
| 492 | static inline int write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp) |
|---|
| 493 | { |
|---|
| 494 | tapdev_info_t *info = s->ring_info; |
|---|
| 495 | blkif_response_t *rsp_d; |
|---|
| 496 | |
|---|
| 497 | rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt); |
|---|
| 498 | memcpy(rsp_d, rsp, sizeof(blkif_response_t)); |
|---|
| 499 | info->fe_ring.rsp_prod_pvt++; |
|---|
| 500 | |
|---|
| 501 | return 0; |
|---|
| 502 | } |
|---|
| 503 | |
|---|
| 504 | static inline void kick_responses(struct td_state *s) |
|---|
| 505 | { |
|---|
| 506 | tapdev_info_t *info = s->ring_info; |
|---|
| 507 | |
|---|
| 508 | if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) |
|---|
| 509 | { |
|---|
| 510 | RING_PUSH_RESPONSES(&info->fe_ring); |
|---|
| 511 | ioctl(info->fd, BLKTAP_IOCTL_KICK_FE); |
|---|
| 512 | } |
|---|
| 513 | } |
|---|
| 514 | |
|---|
| 515 | void io_done(struct disk_driver *dd, int sid) |
|---|
| 516 | { |
|---|
| 517 | struct tap_disk *drv = dd->drv; |
|---|
| 518 | |
|---|
| 519 | if (!run) return; /*We have received signal to close*/ |
|---|
| 520 | |
|---|
| 521 | if (sid > MAX_IOFD || drv->td_do_callbacks(dd, sid) > 0) |
|---|
| 522 | kick_responses(dd->td_state); |
|---|
| 523 | |
|---|
| 524 | return; |
|---|
| 525 | } |
|---|
| 526 | |
|---|
| 527 | static inline uint64_t |
|---|
| 528 | segment_start(blkif_request_t *req, int sidx) |
|---|
| 529 | { |
|---|
| 530 | int i; |
|---|
| 531 | uint64_t start = req->sector_number; |
|---|
| 532 | |
|---|
| 533 | for (i = 0; i < sidx; i++) |
|---|
| 534 | start += (req->seg[i].last_sect - req->seg[i].first_sect + 1); |
|---|
| 535 | |
|---|
| 536 | return start; |
|---|
| 537 | } |
|---|
| 538 | |
|---|
| 539 | uint64_t sends, responds; |
|---|
| 540 | int send_responses(struct disk_driver *dd, int res, |
|---|
| 541 | uint64_t sector, int nr_secs, int idx, void *private) |
|---|
| 542 | { |
|---|
| 543 | pending_req_t *preq; |
|---|
| 544 | blkif_request_t *req; |
|---|
| 545 | int responses_queued = 0; |
|---|
| 546 | struct td_state *s = dd->td_state; |
|---|
| 547 | blkif_t *blkif = s->blkif; |
|---|
| 548 | int sidx = (int)(long)private, secs_done = nr_secs; |
|---|
| 549 | |
|---|
| 550 | if ( (idx > MAX_REQUESTS-1) ) |
|---|
| 551 | { |
|---|
| 552 | DPRINTF("invalid index returned(%u)!\n", idx); |
|---|
| 553 | return 0; |
|---|
| 554 | } |
|---|
| 555 | preq = &blkif->pending_list[idx]; |
|---|
| 556 | req = &preq->req; |
|---|
| 557 | |
|---|
| 558 | if (res == BLK_NOT_ALLOCATED) { |
|---|
| 559 | res = do_cow_read(dd, req, sidx, sector, nr_secs); |
|---|
| 560 | if (res >= 0) { |
|---|
| 561 | secs_done = res; |
|---|
| 562 | res = 0; |
|---|
| 563 | } else |
|---|
| 564 | secs_done = 0; |
|---|
| 565 | } |
|---|
| 566 | |
|---|
| 567 | preq->secs_pending -= secs_done; |
|---|
| 568 | |
|---|
| 569 | if (res == -EBUSY && preq->submitting) |
|---|
| 570 | return -EBUSY; /* propagate -EBUSY back to higher layers */ |
|---|
| 571 | if (res) |
|---|
| 572 | preq->status = BLKIF_RSP_ERROR; |
|---|
| 573 | |
|---|
| 574 | if (!preq->submitting && preq->secs_pending == 0) |
|---|
| 575 | { |
|---|
| 576 | blkif_request_t tmp; |
|---|
| 577 | blkif_response_t *rsp; |
|---|
| 578 | |
|---|
| 579 | tmp = preq->req; |
|---|
| 580 | rsp = (blkif_response_t *)req; |
|---|
| 581 | |
|---|
| 582 | rsp->id = tmp.id; |
|---|
| 583 | rsp->operation = tmp.operation; |
|---|
| 584 | rsp->status = preq->status; |
|---|
| 585 | |
|---|
| 586 | write_rsp_to_ring(s, rsp); |
|---|
| 587 | responses_queued++; |
|---|
| 588 | } |
|---|
| 589 | return responses_queued; |
|---|
| 590 | } |
|---|
| 591 | |
|---|
| 592 | int do_cow_read(struct disk_driver *dd, blkif_request_t *req, |
|---|
| 593 | int sidx, uint64_t sector, int nr_secs) |
|---|
| 594 | { |
|---|
| 595 | char *page; |
|---|
| 596 | int ret, early; |
|---|
| 597 | uint64_t seg_start, seg_end; |
|---|
| 598 | struct td_state *s = dd->td_state; |
|---|
| 599 | tapdev_info_t *info = s->ring_info; |
|---|
| 600 | struct disk_driver *parent = dd->next; |
|---|
| 601 | |
|---|
| 602 | seg_start = segment_start(req, sidx); |
|---|
| 603 | seg_end = seg_start + req->seg[sidx].last_sect + 1; |
|---|
| 604 | |
|---|
| 605 | ASSERT(sector >= seg_start && sector + nr_secs <= seg_end); |
|---|
| 606 | |
|---|
| 607 | page = (char *)MMAP_VADDR(info->vstart, |
|---|
| 608 | (unsigned long)req->id, sidx); |
|---|
| 609 | page += (req->seg[sidx].first_sect << SECTOR_SHIFT); |
|---|
| 610 | page += ((sector - seg_start) << SECTOR_SHIFT); |
|---|
| 611 | |
|---|
| 612 | if (!parent) { |
|---|
| 613 | memset(page, 0, nr_secs << SECTOR_SHIFT); |
|---|
| 614 | return nr_secs; |
|---|
| 615 | } |
|---|
| 616 | |
|---|
| 617 | /* reissue request to backing file */ |
|---|
| 618 | ret = parent->drv->td_queue_read(parent, sector, nr_secs, |
|---|
| 619 | page, send_responses, |
|---|
| 620 | req->id, (void *)(long)sidx); |
|---|
| 621 | if (ret > 0) |
|---|
| 622 | parent->early += ret; |
|---|
| 623 | |
|---|
| 624 | return ((ret >= 0) ? 0 : ret); |
|---|
| 625 | } |
|---|
| 626 | |
|---|
| 627 | static void get_io_request(struct td_state *s) |
|---|
| 628 | { |
|---|
| 629 | RING_IDX rp, rc, j, i; |
|---|
| 630 | blkif_request_t *req; |
|---|
| 631 | int idx, nsects, ret; |
|---|
| 632 | uint64_t sector_nr; |
|---|
| 633 | char *page; |
|---|
| 634 | int early = 0; /* count early completions */ |
|---|
| 635 | struct disk_driver *dd = s->disks; |
|---|
| 636 | struct tap_disk *drv = dd->drv; |
|---|
| 637 | blkif_t *blkif = s->blkif; |
|---|
| 638 | tapdev_info_t *info = s->ring_info; |
|---|
| 639 | int page_size = getpagesize(); |
|---|
| 640 | |
|---|
| 641 | if (!run) return; /*We have received signal to close*/ |
|---|
| 642 | |
|---|
| 643 | rp = info->fe_ring.sring->req_prod; |
|---|
| 644 | rmb(); |
|---|
| 645 | for (j = info->fe_ring.req_cons; j != rp; j++) |
|---|
| 646 | { |
|---|
| 647 | int done = 0, start_seg = 0; |
|---|
| 648 | |
|---|
| 649 | req = NULL; |
|---|
| 650 | req = RING_GET_REQUEST(&info->fe_ring, j); |
|---|
| 651 | ++info->fe_ring.req_cons; |
|---|
| 652 | |
|---|
| 653 | if (req == NULL) continue; |
|---|
| 654 | |
|---|
| 655 | idx = req->id; |
|---|
| 656 | |
|---|
| 657 | if (info->busy.req) { |
|---|
| 658 | /* continue where we left off last time */ |
|---|
| 659 | ASSERT(info->busy.req == req); |
|---|
| 660 | start_seg = info->busy.seg_idx; |
|---|
| 661 | sector_nr = segment_start(req, start_seg); |
|---|
| 662 | info->busy.seg_idx = 0; |
|---|
| 663 | info->busy.req = NULL; |
|---|
| 664 | } else { |
|---|
| 665 | ASSERT(blkif->pending_list[idx].secs_pending == 0); |
|---|
| 666 | memcpy(&blkif->pending_list[idx].req, |
|---|
| 667 | req, sizeof(*req)); |
|---|
| 668 | blkif->pending_list[idx].status = BLKIF_RSP_OKAY; |
|---|
| 669 | blkif->pending_list[idx].submitting = 1; |
|---|
| 670 | sector_nr = req->sector_number; |
|---|
| 671 | } |
|---|
| 672 | |
|---|
| 673 | if ((dd->flags & TD_RDONLY) && |
|---|
| 674 | (req->operation == BLKIF_OP_WRITE)) { |
|---|
| 675 | blkif->pending_list[idx].status = BLKIF_RSP_ERROR; |
|---|
| 676 | goto send_response; |
|---|
| 677 | } |
|---|
| 678 | |
|---|
| 679 | for (i = start_seg; i < req->nr_segments; i++) { |
|---|
| 680 | nsects = req->seg[i].last_sect - |
|---|
| 681 | req->seg[i].first_sect + 1; |
|---|
| 682 | |
|---|
| 683 | if ((req->seg[i].last_sect >= page_size >> 9) || |
|---|
| 684 | (nsects <= 0)) |
|---|
| 685 | continue; |
|---|
| 686 | |
|---|
| 687 | page = (char *)MMAP_VADDR(info->vstart, |
|---|
| 688 | (unsigned long)req->id, i); |
|---|
| 689 | page += (req->seg[i].first_sect << SECTOR_SHIFT); |
|---|
| 690 | |
|---|
| 691 | if (sector_nr >= s->size) { |
|---|
| 692 | DPRINTF("Sector request failed:\n"); |
|---|
| 693 | DPRINTF("%s request, idx [%d,%d] size [%llu], " |
|---|
| 694 | "sector [%llu,%llu]\n", |
|---|
| 695 | (req->operation == BLKIF_OP_WRITE ? |
|---|
| 696 | "WRITE" : "READ"), |
|---|
| 697 | idx,i, |
|---|
| 698 | (long long unsigned) |
|---|
| 699 | nsects<<SECTOR_SHIFT, |
|---|
| 700 | (long long unsigned) |
|---|
| 701 | sector_nr<<SECTOR_SHIFT, |
|---|
| 702 | (long long unsigned) sector_nr); |
|---|
| 703 | continue; |
|---|
| 704 | } |
|---|
| 705 | |
|---|
| 706 | blkif->pending_list[idx].secs_pending += nsects; |
|---|
| 707 | |
|---|
| 708 | switch (req->operation) |
|---|
| 709 | { |
|---|
| 710 | case BLKIF_OP_WRITE: |
|---|
| 711 | ret = drv->td_queue_write(dd, sector_nr, |
|---|
| 712 | nsects, page, |
|---|
| 713 | send_responses, |
|---|
| 714 | idx, (void *)(long)i); |
|---|
| 715 | if (ret > 0) dd->early += ret; |
|---|
| 716 | else if (ret == -EBUSY) { |
|---|
| 717 | /* put req back on queue */ |
|---|
| 718 | --info->fe_ring.req_cons; |
|---|
| 719 | info->busy.req = req; |
|---|
| 720 | info->busy.seg_idx = i; |
|---|
| 721 | goto out; |
|---|
| 722 | } |
|---|
| 723 | break; |
|---|
| 724 | case BLKIF_OP_READ: |
|---|
| 725 | ret = drv->td_queue_read(dd, sector_nr, |
|---|
| 726 | nsects, page, |
|---|
| 727 | send_responses, |
|---|
| 728 | idx, (void *)(long)i); |
|---|
| 729 | if (ret > 0) dd->early += ret; |
|---|
| 730 | else if (ret == -EBUSY) { |
|---|
| 731 | /* put req back on queue */ |
|---|
| 732 | --info->fe_ring.req_cons; |
|---|
| 733 | info->busy.req = req; |
|---|
| 734 | info->busy.seg_idx = i; |
|---|
| 735 | goto out; |
|---|
| 736 | } |
|---|
| 737 | break; |
|---|
| 738 | default: |
|---|
| 739 | DPRINTF("Unknown block operation\n"); |
|---|
| 740 | break; |
|---|
| 741 | } |
|---|
| 742 | sector_nr += nsects; |
|---|
| 743 | } |
|---|
| 744 | send_response: |
|---|
| 745 | blkif->pending_list[idx].submitting = 0; |
|---|
| 746 | /* force write_rsp_to_ring for synchronous case */ |
|---|
| 747 | if (blkif->pending_list[idx].secs_pending == 0) |
|---|
| 748 | dd->early += send_responses(dd, 0, 0, 0, idx, |
|---|
| 749 | (void *)(long)0); |
|---|
| 750 | } |
|---|
| 751 | |
|---|
| 752 | out: |
|---|
| 753 | /*Batch done*/ |
|---|
| 754 | td_for_each_disk(s, dd) { |
|---|
| 755 | dd->early += dd->drv->td_submit(dd); |
|---|
| 756 | if (dd->early > 0) { |
|---|
| 757 | io_done(dd, MAX_IOFD + 1); |
|---|
| 758 | dd->early = 0; |
|---|
| 759 | } |
|---|
| 760 | } |
|---|
| 761 | |
|---|
| 762 | return; |
|---|
| 763 | } |
|---|
| 764 | |
|---|
| 765 | int main(int argc, char *argv[]) |
|---|
| 766 | { |
|---|
| 767 | int len, msglen, ret; |
|---|
| 768 | char *p, *buf; |
|---|
| 769 | fd_set readfds, writefds; |
|---|
| 770 | fd_list_entry_t *ptr; |
|---|
| 771 | struct td_state *s; |
|---|
| 772 | char openlogbuf[128]; |
|---|
| 773 | |
|---|
| 774 | if (argc != 3) usage(); |
|---|
| 775 | |
|---|
| 776 | daemonize(); |
|---|
| 777 | |
|---|
| 778 | snprintf(openlogbuf, sizeof(openlogbuf), "TAPDISK[%d]", getpid()); |
|---|
| 779 | openlog(openlogbuf, LOG_CONS|LOG_ODELAY, LOG_DAEMON); |
|---|
| 780 | /*Setup signal handlers*/ |
|---|
| 781 | signal (SIGBUS, sig_handler); |
|---|
| 782 | signal (SIGINT, sig_handler); |
|---|
| 783 | |
|---|
| 784 | /*Open the control channel*/ |
|---|
| 785 | fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK); |
|---|
| 786 | fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK); |
|---|
| 787 | |
|---|
| 788 | if ( (fds[READ] < 0) || (fds[WRITE] < 0) ) |
|---|
| 789 | { |
|---|
| 790 | DPRINTF("FD open failed [%d,%d]\n", fds[READ], fds[WRITE]); |
|---|
| 791 | exit(-1); |
|---|
| 792 | } |
|---|
| 793 | |
|---|
| 794 | buf = calloc(MSG_SIZE, 1); |
|---|
| 795 | |
|---|
| 796 | if (buf == NULL) |
|---|
| 797 | { |
|---|
| 798 | DPRINTF("ERROR: allocating memory.\n"); |
|---|
| 799 | exit(-1); |
|---|
| 800 | } |
|---|
| 801 | |
|---|
| 802 | while (run) |
|---|
| 803 | { |
|---|
| 804 | ret = 0; |
|---|
| 805 | FD_ZERO(&readfds); |
|---|
| 806 | FD_SET(fds[READ], &readfds); |
|---|
| 807 | maxfds = fds[READ]; |
|---|
| 808 | |
|---|
| 809 | /*Set all tap fds*/ |
|---|
| 810 | LOCAL_FD_SET(&readfds); |
|---|
| 811 | |
|---|
| 812 | /*Wait for incoming messages*/ |
|---|
| 813 | ret = select(maxfds + 1, &readfds, (fd_set *) 0, |
|---|
| 814 | (fd_set *) 0, NULL); |
|---|
| 815 | |
|---|
| 816 | if (ret > 0) |
|---|
| 817 | { |
|---|
| 818 | ptr = fd_start; |
|---|
| 819 | while (ptr != NULL) { |
|---|
| 820 | int progress_made = 0; |
|---|
| 821 | struct disk_driver *dd; |
|---|
| 822 | tapdev_info_t *info = ptr->s->ring_info; |
|---|
| 823 | |
|---|
| 824 | td_for_each_disk(ptr->s, dd) { |
|---|
| 825 | if (dd->io_fd[READ] && |
|---|
| 826 | FD_ISSET(dd->io_fd[READ], |
|---|
| 827 | &readfds)) { |
|---|
| 828 | io_done(dd, READ); |
|---|
| 829 | progress_made = 1; |
|---|
| 830 | } |
|---|
| 831 | } |
|---|
| 832 | |
|---|
| 833 | /* completed io from above may have |
|---|
| 834 | * queued new requests on chained disks */ |
|---|
| 835 | if (progress_made) { |
|---|
| 836 | td_for_each_disk(ptr->s, dd) { |
|---|
| 837 | dd->early += |
|---|
| 838 | dd->drv->td_submit(dd); |
|---|
| 839 | if (dd->early > 0) { |
|---|
| 840 | io_done(dd, |
|---|
| 841 | MAX_IOFD + 1); |
|---|
| 842 | dd->early = 0; |
|---|
| 843 | } |
|---|
| 844 | } |
|---|
| 845 | } |
|---|
| 846 | |
|---|
| 847 | if (FD_ISSET(ptr->tap_fd, &readfds) || |
|---|
| 848 | (info->busy.req && progress_made)) |
|---|
| 849 | get_io_request(ptr->s); |
|---|
| 850 | |
|---|
| 851 | ptr = ptr->next; |
|---|
| 852 | } |
|---|
| 853 | |
|---|
| 854 | if (FD_ISSET(fds[READ], &readfds)) |
|---|
| 855 | read_msg(buf); |
|---|
| 856 | } |
|---|
| 857 | } |
|---|
| 858 | free(buf); |
|---|
| 859 | close(fds[READ]); |
|---|
| 860 | close(fds[WRITE]); |
|---|
| 861 | |
|---|
| 862 | ptr = fd_start; |
|---|
| 863 | while (ptr != NULL) { |
|---|
| 864 | s = ptr->s; |
|---|
| 865 | |
|---|
| 866 | unmap_disk(s); |
|---|
| 867 | free(s->blkif); |
|---|
| 868 | free(s->ring_info); |
|---|
| 869 | free(s); |
|---|
| 870 | close(ptr->tap_fd); |
|---|
| 871 | ptr = ptr->next; |
|---|
| 872 | } |
|---|
| 873 | closelog(); |
|---|
| 874 | |
|---|
| 875 | return 0; |
|---|
| 876 | } |
|---|