drivers: power: report battery voltage in AOSP compatible format
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / block / xen-blkfront.c
1 /*
2 * blkfront.c
3 *
4 * XenLinux virtual block device driver.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark
11 * Copyright (c) 2005, XenSource Ltd
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license:
18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions:
25 *
26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE.
36 */
37
38 #include <linux/interrupt.h>
39 #include <linux/blkdev.h>
40 #include <linux/hdreg.h>
41 #include <linux/cdrom.h>
42 #include <linux/module.h>
43 #include <linux/slab.h>
44 #include <linux/mutex.h>
45 #include <linux/scatterlist.h>
46 #include <linux/bitmap.h>
47 #include <linux/list.h>
48
49 #include <xen/xen.h>
50 #include <xen/xenbus.h>
51 #include <xen/grant_table.h>
52 #include <xen/events.h>
53 #include <xen/page.h>
54 #include <xen/platform_pci.h>
55
56 #include <xen/interface/grant_table.h>
57 #include <xen/interface/io/blkif.h>
58 #include <xen/interface/io/protocols.h>
59
60 #include <asm/xen/hypervisor.h>
61
62 enum blkif_state {
63 BLKIF_STATE_DISCONNECTED,
64 BLKIF_STATE_CONNECTED,
65 BLKIF_STATE_SUSPENDED,
66 };
67
68 struct grant {
69 grant_ref_t gref;
70 unsigned long pfn;
71 struct list_head node;
72 };
73
74 struct blk_shadow {
75 struct blkif_request req;
76 struct request *request;
77 struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
78 struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
79 };
80
81 static DEFINE_MUTEX(blkfront_mutex);
82 static const struct block_device_operations xlvbd_block_fops;
83
84 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
85
86 /*
87 * We have one of these per vbd, whether ide, scsi or 'other'. They
88 * hang in private_data off the gendisk structure. We may end up
89 * putting all kinds of interesting stuff here :-)
90 */
91 struct blkfront_info
92 {
93 spinlock_t io_lock;
94 struct mutex mutex;
95 struct xenbus_device *xbdev;
96 struct gendisk *gd;
97 int vdevice;
98 blkif_vdev_t handle;
99 enum blkif_state connected;
100 int ring_ref;
101 struct blkif_front_ring ring;
102 unsigned int evtchn, irq;
103 struct request_queue *rq;
104 struct work_struct work;
105 struct gnttab_free_callback callback;
106 struct blk_shadow shadow[BLK_RING_SIZE];
107 struct list_head grants;
108 unsigned int persistent_gnts_c;
109 unsigned long shadow_free;
110 unsigned int feature_flush;
111 unsigned int flush_op;
112 unsigned int feature_discard:1;
113 unsigned int feature_secdiscard:1;
114 unsigned int discard_granularity;
115 unsigned int discard_alignment;
116 unsigned int feature_persistent:1;
117 int is_ready;
118 };
119
120 static unsigned int nr_minors;
121 static unsigned long *minors;
122 static DEFINE_SPINLOCK(minor_lock);
123
124 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
125 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
126 #define GRANT_INVALID_REF 0
127
128 #define PARTS_PER_DISK 16
129 #define PARTS_PER_EXT_DISK 256
130
131 #define BLKIF_MAJOR(dev) ((dev)>>8)
132 #define BLKIF_MINOR(dev) ((dev) & 0xff)
133
134 #define EXT_SHIFT 28
135 #define EXTENDED (1<<EXT_SHIFT)
136 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
137 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
138 #define EMULATED_HD_DISK_MINOR_OFFSET (0)
139 #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
140 #define EMULATED_SD_DISK_MINOR_OFFSET (0)
141 #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
142
143 #define DEV_NAME "xvd" /* name in /dev */
144
145 static int get_id_from_freelist(struct blkfront_info *info)
146 {
147 unsigned long free = info->shadow_free;
148 BUG_ON(free >= BLK_RING_SIZE);
149 info->shadow_free = info->shadow[free].req.u.rw.id;
150 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
151 return free;
152 }
153
154 static int add_id_to_freelist(struct blkfront_info *info,
155 unsigned long id)
156 {
157 if (info->shadow[id].req.u.rw.id != id)
158 return -EINVAL;
159 if (info->shadow[id].request == NULL)
160 return -EINVAL;
161 info->shadow[id].req.u.rw.id = info->shadow_free;
162 info->shadow[id].request = NULL;
163 info->shadow_free = id;
164 return 0;
165 }
166
167 static int fill_grant_buffer(struct blkfront_info *info, int num)
168 {
169 struct page *granted_page;
170 struct grant *gnt_list_entry, *n;
171 int i = 0;
172
173 while(i < num) {
174 gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
175 if (!gnt_list_entry)
176 goto out_of_memory;
177
178 if (info->feature_persistent) {
179 granted_page = alloc_page(GFP_NOIO);
180 if (!granted_page) {
181 kfree(gnt_list_entry);
182 goto out_of_memory;
183 }
184 gnt_list_entry->pfn = page_to_pfn(granted_page);
185 }
186
187 gnt_list_entry->gref = GRANT_INVALID_REF;
188 list_add(&gnt_list_entry->node, &info->grants);
189 i++;
190 }
191
192 return 0;
193
194 out_of_memory:
195 list_for_each_entry_safe(gnt_list_entry, n,
196 &info->grants, node) {
197 list_del(&gnt_list_entry->node);
198 if (info->feature_persistent)
199 __free_page(pfn_to_page(gnt_list_entry->pfn));
200 kfree(gnt_list_entry);
201 i--;
202 }
203 BUG_ON(i != 0);
204 return -ENOMEM;
205 }
206
207 static struct grant *get_grant(grant_ref_t *gref_head,
208 unsigned long pfn,
209 struct blkfront_info *info)
210 {
211 struct grant *gnt_list_entry;
212 unsigned long buffer_mfn;
213
214 BUG_ON(list_empty(&info->grants));
215 gnt_list_entry = list_first_entry(&info->grants, struct grant, node);
216 list_del(&gnt_list_entry->node);
217
218 if (gnt_list_entry->gref != GRANT_INVALID_REF) {
219 info->persistent_gnts_c--;
220 return gnt_list_entry;
221 }
222
223 /* Assign a gref to this page */
224 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
225 BUG_ON(gnt_list_entry->gref == -ENOSPC);
226 if (!info->feature_persistent) {
227 BUG_ON(!pfn);
228 gnt_list_entry->pfn = pfn;
229 }
230 buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
231 gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
232 info->xbdev->otherend_id,
233 buffer_mfn, 0);
234 return gnt_list_entry;
235 }
236
237 static const char *op_name(int op)
238 {
239 static const char *const names[] = {
240 [BLKIF_OP_READ] = "read",
241 [BLKIF_OP_WRITE] = "write",
242 [BLKIF_OP_WRITE_BARRIER] = "barrier",
243 [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
244 [BLKIF_OP_DISCARD] = "discard" };
245
246 if (op < 0 || op >= ARRAY_SIZE(names))
247 return "unknown";
248
249 if (!names[op])
250 return "reserved";
251
252 return names[op];
253 }
254 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
255 {
256 unsigned int end = minor + nr;
257 int rc;
258
259 if (end > nr_minors) {
260 unsigned long *bitmap, *old;
261
262 bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
263 GFP_KERNEL);
264 if (bitmap == NULL)
265 return -ENOMEM;
266
267 spin_lock(&minor_lock);
268 if (end > nr_minors) {
269 old = minors;
270 memcpy(bitmap, minors,
271 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
272 minors = bitmap;
273 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
274 } else
275 old = bitmap;
276 spin_unlock(&minor_lock);
277 kfree(old);
278 }
279
280 spin_lock(&minor_lock);
281 if (find_next_bit(minors, end, minor) >= end) {
282 bitmap_set(minors, minor, nr);
283 rc = 0;
284 } else
285 rc = -EBUSY;
286 spin_unlock(&minor_lock);
287
288 return rc;
289 }
290
291 static void xlbd_release_minors(unsigned int minor, unsigned int nr)
292 {
293 unsigned int end = minor + nr;
294
295 BUG_ON(end > nr_minors);
296 spin_lock(&minor_lock);
297 bitmap_clear(minors, minor, nr);
298 spin_unlock(&minor_lock);
299 }
300
301 static void blkif_restart_queue_callback(void *arg)
302 {
303 struct blkfront_info *info = (struct blkfront_info *)arg;
304 schedule_work(&info->work);
305 }
306
307 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
308 {
309 /* We don't have real geometry info, but let's at least return
310 values consistent with the size of the device */
311 sector_t nsect = get_capacity(bd->bd_disk);
312 sector_t cylinders = nsect;
313
314 hg->heads = 0xff;
315 hg->sectors = 0x3f;
316 sector_div(cylinders, hg->heads * hg->sectors);
317 hg->cylinders = cylinders;
318 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
319 hg->cylinders = 0xffff;
320 return 0;
321 }
322
323 static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
324 unsigned command, unsigned long argument)
325 {
326 struct blkfront_info *info = bdev->bd_disk->private_data;
327 int i;
328
329 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
330 command, (long)argument);
331
332 switch (command) {
333 case CDROMMULTISESSION:
334 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
335 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
336 if (put_user(0, (char __user *)(argument + i)))
337 return -EFAULT;
338 return 0;
339
340 case CDROM_GET_CAPABILITY: {
341 struct gendisk *gd = info->gd;
342 if (gd->flags & GENHD_FL_CD)
343 return 0;
344 return -EINVAL;
345 }
346
347 default:
348 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
349 command);*/
350 return -EINVAL; /* same return as native Linux */
351 }
352
353 return 0;
354 }
355
356 /*
357 * Generate a Xen blkfront IO request from a blk layer request. Reads
358 * and writes are handled as expected.
359 *
360 * @req: a request struct
361 */
362 static int blkif_queue_request(struct request *req)
363 {
364 struct blkfront_info *info = req->rq_disk->private_data;
365 struct blkif_request *ring_req;
366 unsigned long id;
367 unsigned int fsect, lsect;
368 int i, ref;
369
370 /*
371 * Used to store if we are able to queue the request by just using
372 * existing persistent grants, or if we have to get new grants,
373 * as there are not sufficiently many free.
374 */
375 bool new_persistent_gnts;
376 grant_ref_t gref_head;
377 struct grant *gnt_list_entry = NULL;
378 struct scatterlist *sg;
379
380 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
381 return 1;
382
383 /* Check if we have enought grants to allocate a requests */
384 if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
385 new_persistent_gnts = 1;
386 if (gnttab_alloc_grant_references(
387 BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
388 &gref_head) < 0) {
389 gnttab_request_free_callback(
390 &info->callback,
391 blkif_restart_queue_callback,
392 info,
393 BLKIF_MAX_SEGMENTS_PER_REQUEST);
394 return 1;
395 }
396 } else
397 new_persistent_gnts = 0;
398
399 /* Fill out a communications ring structure. */
400 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
401 id = get_id_from_freelist(info);
402 info->shadow[id].request = req;
403
404 ring_req->u.rw.id = id;
405 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
406 ring_req->u.rw.handle = info->handle;
407
408 ring_req->operation = rq_data_dir(req) ?
409 BLKIF_OP_WRITE : BLKIF_OP_READ;
410
411 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
412 /*
413 * Ideally we can do an unordered flush-to-disk. In case the
414 * backend onlysupports barriers, use that. A barrier request
415 * a superset of FUA, so we can implement it the same
416 * way. (It's also a FLUSH+FUA, since it is
417 * guaranteed ordered WRT previous writes.)
418 */
419 ring_req->operation = info->flush_op;
420 }
421
422 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
423 /* id, sector_number and handle are set above. */
424 ring_req->operation = BLKIF_OP_DISCARD;
425 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
426 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
427 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
428 else
429 ring_req->u.discard.flag = 0;
430 } else {
431 ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
432 info->shadow[id].sg);
433 BUG_ON(ring_req->u.rw.nr_segments >
434 BLKIF_MAX_SEGMENTS_PER_REQUEST);
435
436 for_each_sg(info->shadow[id].sg, sg, ring_req->u.rw.nr_segments, i) {
437 fsect = sg->offset >> 9;
438 lsect = fsect + (sg->length >> 9) - 1;
439
440 gnt_list_entry = get_grant(&gref_head, page_to_pfn(sg_page(sg)), info);
441 ref = gnt_list_entry->gref;
442
443 info->shadow[id].grants_used[i] = gnt_list_entry;
444
445 if (rq_data_dir(req) && info->feature_persistent) {
446 char *bvec_data;
447 void *shared_data;
448
449 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
450
451 shared_data = kmap_atomic(
452 pfn_to_page(gnt_list_entry->pfn));
453 bvec_data = kmap_atomic(sg_page(sg));
454
455 /*
456 * this does not wipe data stored outside the
457 * range sg->offset..sg->offset+sg->length.
458 * Therefore, blkback *could* see data from
459 * previous requests. This is OK as long as
460 * persistent grants are shared with just one
461 * domain. It may need refactoring if this
462 * changes
463 */
464 memcpy(shared_data + sg->offset,
465 bvec_data + sg->offset,
466 sg->length);
467
468 kunmap_atomic(bvec_data);
469 kunmap_atomic(shared_data);
470 }
471
472 ring_req->u.rw.seg[i] =
473 (struct blkif_request_segment) {
474 .gref = ref,
475 .first_sect = fsect,
476 .last_sect = lsect };
477 }
478 }
479
480 info->ring.req_prod_pvt++;
481
482 /* Keep a private copy so we can reissue requests when recovering. */
483 info->shadow[id].req = *ring_req;
484
485 if (new_persistent_gnts)
486 gnttab_free_grant_references(gref_head);
487
488 return 0;
489 }
490
491
492 static inline void flush_requests(struct blkfront_info *info)
493 {
494 int notify;
495
496 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
497
498 if (notify)
499 notify_remote_via_irq(info->irq);
500 }
501
502 /*
503 * do_blkif_request
504 * read a block; request is in a request queue
505 */
506 static void do_blkif_request(struct request_queue *rq)
507 {
508 struct blkfront_info *info = NULL;
509 struct request *req;
510 int queued;
511
512 pr_debug("Entered do_blkif_request\n");
513
514 queued = 0;
515
516 while ((req = blk_peek_request(rq)) != NULL) {
517 info = req->rq_disk->private_data;
518
519 if (RING_FULL(&info->ring))
520 goto wait;
521
522 blk_start_request(req);
523
524 if ((req->cmd_type != REQ_TYPE_FS) ||
525 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
526 !info->flush_op)) {
527 __blk_end_request_all(req, -EIO);
528 continue;
529 }
530
531 pr_debug("do_blk_req %p: cmd %p, sec %lx, "
532 "(%u/%u) buffer:%p [%s]\n",
533 req, req->cmd, (unsigned long)blk_rq_pos(req),
534 blk_rq_cur_sectors(req), blk_rq_sectors(req),
535 req->buffer, rq_data_dir(req) ? "write" : "read");
536
537 if (blkif_queue_request(req)) {
538 blk_requeue_request(rq, req);
539 wait:
540 /* Avoid pointless unplugs. */
541 blk_stop_queue(rq);
542 break;
543 }
544
545 queued++;
546 }
547
548 if (queued != 0)
549 flush_requests(info);
550 }
551
552 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
553 {
554 struct request_queue *rq;
555 struct blkfront_info *info = gd->private_data;
556
557 rq = blk_init_queue(do_blkif_request, &info->io_lock);
558 if (rq == NULL)
559 return -1;
560
561 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
562
563 if (info->feature_discard) {
564 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
565 blk_queue_max_discard_sectors(rq, get_capacity(gd));
566 rq->limits.discard_granularity = info->discard_granularity;
567 rq->limits.discard_alignment = info->discard_alignment;
568 if (info->feature_secdiscard)
569 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
570 }
571
572 /* Hard sector size and max sectors impersonate the equiv. hardware. */
573 blk_queue_logical_block_size(rq, sector_size);
574 blk_queue_max_hw_sectors(rq, 512);
575
576 /* Each segment in a request is up to an aligned page in size. */
577 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
578 blk_queue_max_segment_size(rq, PAGE_SIZE);
579
580 /* Ensure a merged request will fit in a single I/O ring slot. */
581 blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
582
583 /* Make sure buffer addresses are sector-aligned. */
584 blk_queue_dma_alignment(rq, 511);
585
586 /* Make sure we don't use bounce buffers. */
587 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
588
589 gd->queue = rq;
590
591 return 0;
592 }
593
594
595 static void xlvbd_flush(struct blkfront_info *info)
596 {
597 blk_queue_flush(info->rq, info->feature_flush);
598 printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
599 info->gd->disk_name,
600 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
601 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
602 "flush diskcache" : "barrier or flush"),
603 info->feature_flush ? "enabled" : "disabled",
604 info->feature_persistent ? "using persistent grants" : "");
605 }
606
607 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
608 {
609 int major;
610 major = BLKIF_MAJOR(vdevice);
611 *minor = BLKIF_MINOR(vdevice);
612 switch (major) {
613 case XEN_IDE0_MAJOR:
614 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
615 *minor = ((*minor / 64) * PARTS_PER_DISK) +
616 EMULATED_HD_DISK_MINOR_OFFSET;
617 break;
618 case XEN_IDE1_MAJOR:
619 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
620 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
621 EMULATED_HD_DISK_MINOR_OFFSET;
622 break;
623 case XEN_SCSI_DISK0_MAJOR:
624 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
625 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
626 break;
627 case XEN_SCSI_DISK1_MAJOR:
628 case XEN_SCSI_DISK2_MAJOR:
629 case XEN_SCSI_DISK3_MAJOR:
630 case XEN_SCSI_DISK4_MAJOR:
631 case XEN_SCSI_DISK5_MAJOR:
632 case XEN_SCSI_DISK6_MAJOR:
633 case XEN_SCSI_DISK7_MAJOR:
634 *offset = (*minor / PARTS_PER_DISK) +
635 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
636 EMULATED_SD_DISK_NAME_OFFSET;
637 *minor = *minor +
638 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
639 EMULATED_SD_DISK_MINOR_OFFSET;
640 break;
641 case XEN_SCSI_DISK8_MAJOR:
642 case XEN_SCSI_DISK9_MAJOR:
643 case XEN_SCSI_DISK10_MAJOR:
644 case XEN_SCSI_DISK11_MAJOR:
645 case XEN_SCSI_DISK12_MAJOR:
646 case XEN_SCSI_DISK13_MAJOR:
647 case XEN_SCSI_DISK14_MAJOR:
648 case XEN_SCSI_DISK15_MAJOR:
649 *offset = (*minor / PARTS_PER_DISK) +
650 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
651 EMULATED_SD_DISK_NAME_OFFSET;
652 *minor = *minor +
653 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
654 EMULATED_SD_DISK_MINOR_OFFSET;
655 break;
656 case XENVBD_MAJOR:
657 *offset = *minor / PARTS_PER_DISK;
658 break;
659 default:
660 printk(KERN_WARNING "blkfront: your disk configuration is "
661 "incorrect, please use an xvd device instead\n");
662 return -ENODEV;
663 }
664 return 0;
665 }
666
667 static char *encode_disk_name(char *ptr, unsigned int n)
668 {
669 if (n >= 26)
670 ptr = encode_disk_name(ptr, n / 26 - 1);
671 *ptr = 'a' + n % 26;
672 return ptr + 1;
673 }
674
675 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
676 struct blkfront_info *info,
677 u16 vdisk_info, u16 sector_size)
678 {
679 struct gendisk *gd;
680 int nr_minors = 1;
681 int err;
682 unsigned int offset;
683 int minor;
684 int nr_parts;
685 char *ptr;
686
687 BUG_ON(info->gd != NULL);
688 BUG_ON(info->rq != NULL);
689
690 if ((info->vdevice>>EXT_SHIFT) > 1) {
691 /* this is above the extended range; something is wrong */
692 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
693 return -ENODEV;
694 }
695
696 if (!VDEV_IS_EXTENDED(info->vdevice)) {
697 err = xen_translate_vdev(info->vdevice, &minor, &offset);
698 if (err)
699 return err;
700 nr_parts = PARTS_PER_DISK;
701 } else {
702 minor = BLKIF_MINOR_EXT(info->vdevice);
703 nr_parts = PARTS_PER_EXT_DISK;
704 offset = minor / nr_parts;
705 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
706 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
707 "emulated IDE disks,\n\t choose an xvd device name"
708 "from xvde on\n", info->vdevice);
709 }
710 if (minor >> MINORBITS) {
711 pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n",
712 info->vdevice, minor);
713 return -ENODEV;
714 }
715
716 if ((minor % nr_parts) == 0)
717 nr_minors = nr_parts;
718
719 err = xlbd_reserve_minors(minor, nr_minors);
720 if (err)
721 goto out;
722 err = -ENODEV;
723
724 gd = alloc_disk(nr_minors);
725 if (gd == NULL)
726 goto release;
727
728 strcpy(gd->disk_name, DEV_NAME);
729 ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
730 BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN);
731 if (nr_minors > 1)
732 *ptr = 0;
733 else
734 snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr,
735 "%d", minor & (nr_parts - 1));
736
737 gd->major = XENVBD_MAJOR;
738 gd->first_minor = minor;
739 gd->fops = &xlvbd_block_fops;
740 gd->private_data = info;
741 gd->driverfs_dev = &(info->xbdev->dev);
742 set_capacity(gd, capacity);
743
744 if (xlvbd_init_blk_queue(gd, sector_size)) {
745 del_gendisk(gd);
746 goto release;
747 }
748
749 info->rq = gd->queue;
750 info->gd = gd;
751
752 xlvbd_flush(info);
753
754 if (vdisk_info & VDISK_READONLY)
755 set_disk_ro(gd, 1);
756
757 if (vdisk_info & VDISK_REMOVABLE)
758 gd->flags |= GENHD_FL_REMOVABLE;
759
760 if (vdisk_info & VDISK_CDROM)
761 gd->flags |= GENHD_FL_CD;
762
763 return 0;
764
765 release:
766 xlbd_release_minors(minor, nr_minors);
767 out:
768 return err;
769 }
770
771 static void xlvbd_release_gendisk(struct blkfront_info *info)
772 {
773 unsigned int minor, nr_minors;
774 unsigned long flags;
775
776 if (info->rq == NULL)
777 return;
778
779 spin_lock_irqsave(&info->io_lock, flags);
780
781 /* No more blkif_request(). */
782 blk_stop_queue(info->rq);
783
784 /* No more gnttab callback work. */
785 gnttab_cancel_free_callback(&info->callback);
786 spin_unlock_irqrestore(&info->io_lock, flags);
787
788 /* Flush gnttab callback work. Must be done with no locks held. */
789 flush_work(&info->work);
790
791 del_gendisk(info->gd);
792
793 minor = info->gd->first_minor;
794 nr_minors = info->gd->minors;
795 xlbd_release_minors(minor, nr_minors);
796
797 blk_cleanup_queue(info->rq);
798 info->rq = NULL;
799
800 put_disk(info->gd);
801 info->gd = NULL;
802 }
803
804 static void kick_pending_request_queues(struct blkfront_info *info)
805 {
806 if (!RING_FULL(&info->ring)) {
807 /* Re-enable calldowns. */
808 blk_start_queue(info->rq);
809 /* Kick things off immediately. */
810 do_blkif_request(info->rq);
811 }
812 }
813
814 static void blkif_restart_queue(struct work_struct *work)
815 {
816 struct blkfront_info *info = container_of(work, struct blkfront_info, work);
817
818 spin_lock_irq(&info->io_lock);
819 if (info->connected == BLKIF_STATE_CONNECTED)
820 kick_pending_request_queues(info);
821 spin_unlock_irq(&info->io_lock);
822 }
823
824 static void blkif_free(struct blkfront_info *info, int suspend)
825 {
826 struct grant *persistent_gnt;
827 struct grant *n;
828
829 /* Prevent new requests being issued until we fix things up. */
830 spin_lock_irq(&info->io_lock);
831 info->connected = suspend ?
832 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
833 /* No more blkif_request(). */
834 if (info->rq)
835 blk_stop_queue(info->rq);
836
837 /* Remove all persistent grants */
838 if (!list_empty(&info->grants)) {
839 list_for_each_entry_safe(persistent_gnt, n,
840 &info->grants, node) {
841 list_del(&persistent_gnt->node);
842 if (persistent_gnt->gref != GRANT_INVALID_REF) {
843 gnttab_end_foreign_access(persistent_gnt->gref,
844 0, 0UL);
845 info->persistent_gnts_c--;
846 }
847 if (info->feature_persistent)
848 __free_page(pfn_to_page(persistent_gnt->pfn));
849 kfree(persistent_gnt);
850 }
851 }
852 BUG_ON(info->persistent_gnts_c != 0);
853
854 /* No more gnttab callback work. */
855 gnttab_cancel_free_callback(&info->callback);
856 spin_unlock_irq(&info->io_lock);
857
858 /* Flush gnttab callback work. Must be done with no locks held. */
859 flush_work(&info->work);
860
861 /* Free resources associated with old device channel. */
862 if (info->ring_ref != GRANT_INVALID_REF) {
863 gnttab_end_foreign_access(info->ring_ref, 0,
864 (unsigned long)info->ring.sring);
865 info->ring_ref = GRANT_INVALID_REF;
866 info->ring.sring = NULL;
867 }
868 if (info->irq)
869 unbind_from_irqhandler(info->irq, info);
870 info->evtchn = info->irq = 0;
871
872 }
873
874 static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
875 struct blkif_response *bret)
876 {
877 int i = 0;
878 struct scatterlist *sg;
879 char *bvec_data;
880 void *shared_data;
881 int nseg;
882
883 nseg = s->req.u.rw.nr_segments;
884
885 if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
886 /*
887 * Copy the data received from the backend into the bvec.
888 * Since bv_offset can be different than 0, and bv_len different
889 * than PAGE_SIZE, we have to keep track of the current offset,
890 * to be sure we are copying the data from the right shared page.
891 */
892 for_each_sg(s->sg, sg, nseg, i) {
893 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
894 shared_data = kmap_atomic(
895 pfn_to_page(s->grants_used[i]->pfn));
896 bvec_data = kmap_atomic(sg_page(sg));
897 memcpy(bvec_data + sg->offset,
898 shared_data + sg->offset,
899 sg->length);
900 kunmap_atomic(bvec_data);
901 kunmap_atomic(shared_data);
902 }
903 }
904 /* Add the persistent grant into the list of free grants */
905 for (i = 0; i < nseg; i++) {
906 if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
907 /*
908 * If the grant is still mapped by the backend (the
909 * backend has chosen to make this grant persistent)
910 * we add it at the head of the list, so it will be
911 * reused first.
912 */
913 if (!info->feature_persistent)
914 pr_alert_ratelimited("backed has not unmapped grant: %u\n",
915 s->grants_used[i]->gref);
916 list_add(&s->grants_used[i]->node, &info->grants);
917 info->persistent_gnts_c++;
918 } else {
919 /*
920 * If the grant is not mapped by the backend we end the
921 * foreign access and add it to the tail of the list,
922 * so it will not be picked again unless we run out of
923 * persistent grants.
924 */
925 gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
926 s->grants_used[i]->gref = GRANT_INVALID_REF;
927 list_add_tail(&s->grants_used[i]->node, &info->grants);
928 }
929 }
930 }
931
932 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
933 {
934 struct request *req;
935 struct blkif_response *bret;
936 RING_IDX i, rp;
937 unsigned long flags;
938 struct blkfront_info *info = (struct blkfront_info *)dev_id;
939 int error;
940
941 spin_lock_irqsave(&info->io_lock, flags);
942
943 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
944 spin_unlock_irqrestore(&info->io_lock, flags);
945 return IRQ_HANDLED;
946 }
947
948 again:
949 rp = info->ring.sring->rsp_prod;
950 rmb(); /* Ensure we see queued responses up to 'rp'. */
951
952 for (i = info->ring.rsp_cons; i != rp; i++) {
953 unsigned long id;
954
955 bret = RING_GET_RESPONSE(&info->ring, i);
956 id = bret->id;
957 /*
958 * The backend has messed up and given us an id that we would
959 * never have given to it (we stamp it up to BLK_RING_SIZE -
960 * look in get_id_from_freelist.
961 */
962 if (id >= BLK_RING_SIZE) {
963 WARN(1, "%s: response to %s has incorrect id (%ld)\n",
964 info->gd->disk_name, op_name(bret->operation), id);
965 /* We can't safely get the 'struct request' as
966 * the id is busted. */
967 continue;
968 }
969 req = info->shadow[id].request;
970
971 if (bret->operation != BLKIF_OP_DISCARD)
972 blkif_completion(&info->shadow[id], info, bret);
973
974 if (add_id_to_freelist(info, id)) {
975 WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
976 info->gd->disk_name, op_name(bret->operation), id);
977 continue;
978 }
979
980 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
981 switch (bret->operation) {
982 case BLKIF_OP_DISCARD:
983 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
984 struct request_queue *rq = info->rq;
985 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
986 info->gd->disk_name, op_name(bret->operation));
987 error = -EOPNOTSUPP;
988 info->feature_discard = 0;
989 info->feature_secdiscard = 0;
990 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
991 queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
992 }
993 __blk_end_request_all(req, error);
994 break;
995 case BLKIF_OP_FLUSH_DISKCACHE:
996 case BLKIF_OP_WRITE_BARRIER:
997 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
998 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
999 info->gd->disk_name, op_name(bret->operation));
1000 error = -EOPNOTSUPP;
1001 }
1002 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
1003 info->shadow[id].req.u.rw.nr_segments == 0)) {
1004 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
1005 info->gd->disk_name, op_name(bret->operation));
1006 error = -EOPNOTSUPP;
1007 }
1008 if (unlikely(error)) {
1009 if (error == -EOPNOTSUPP)
1010 error = 0;
1011 info->feature_flush = 0;
1012 info->flush_op = 0;
1013 xlvbd_flush(info);
1014 }
1015 /* fall through */
1016 case BLKIF_OP_READ:
1017 case BLKIF_OP_WRITE:
1018 if (unlikely(bret->status != BLKIF_RSP_OKAY))
1019 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
1020 "request: %x\n", bret->status);
1021
1022 __blk_end_request_all(req, error);
1023 break;
1024 default:
1025 BUG();
1026 }
1027 }
1028
1029 info->ring.rsp_cons = i;
1030
1031 if (i != info->ring.req_prod_pvt) {
1032 int more_to_do;
1033 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
1034 if (more_to_do)
1035 goto again;
1036 } else
1037 info->ring.sring->rsp_event = i + 1;
1038
1039 kick_pending_request_queues(info);
1040
1041 spin_unlock_irqrestore(&info->io_lock, flags);
1042
1043 return IRQ_HANDLED;
1044 }
1045
1046
1047 static int setup_blkring(struct xenbus_device *dev,
1048 struct blkfront_info *info)
1049 {
1050 struct blkif_sring *sring;
1051 int err, i;
1052
1053 info->ring_ref = GRANT_INVALID_REF;
1054
1055 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
1056 if (!sring) {
1057 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1058 return -ENOMEM;
1059 }
1060 SHARED_RING_INIT(sring);
1061 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
1062
1063 for (i = 0; i < BLK_RING_SIZE; i++)
1064 sg_init_table(info->shadow[i].sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
1065
1066 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
1067 if (err < 0) {
1068 free_page((unsigned long)sring);
1069 info->ring.sring = NULL;
1070 goto fail;
1071 }
1072 info->ring_ref = err;
1073
1074 err = xenbus_alloc_evtchn(dev, &info->evtchn);
1075 if (err)
1076 goto fail;
1077
1078 err = bind_evtchn_to_irqhandler(info->evtchn, blkif_interrupt, 0,
1079 "blkif", info);
1080 if (err <= 0) {
1081 xenbus_dev_fatal(dev, err,
1082 "bind_evtchn_to_irqhandler failed");
1083 goto fail;
1084 }
1085 info->irq = err;
1086
1087 return 0;
1088 fail:
1089 blkif_free(info, 0);
1090 return err;
1091 }
1092
1093
1094 /* Common code used when first setting up, and when resuming. */
1095 static int talk_to_blkback(struct xenbus_device *dev,
1096 struct blkfront_info *info)
1097 {
1098 const char *message = NULL;
1099 struct xenbus_transaction xbt;
1100 int err;
1101
1102 /* Create shared ring, alloc event channel. */
1103 err = setup_blkring(dev, info);
1104 if (err)
1105 goto out;
1106
1107 again:
1108 err = xenbus_transaction_start(&xbt);
1109 if (err) {
1110 xenbus_dev_fatal(dev, err, "starting transaction");
1111 goto destroy_blkring;
1112 }
1113
1114 err = xenbus_printf(xbt, dev->nodename,
1115 "ring-ref", "%u", info->ring_ref);
1116 if (err) {
1117 message = "writing ring-ref";
1118 goto abort_transaction;
1119 }
1120 err = xenbus_printf(xbt, dev->nodename,
1121 "event-channel", "%u", info->evtchn);
1122 if (err) {
1123 message = "writing event-channel";
1124 goto abort_transaction;
1125 }
1126 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
1127 XEN_IO_PROTO_ABI_NATIVE);
1128 if (err) {
1129 message = "writing protocol";
1130 goto abort_transaction;
1131 }
1132 err = xenbus_printf(xbt, dev->nodename,
1133 "feature-persistent", "%u", 1);
1134 if (err)
1135 dev_warn(&dev->dev,
1136 "writing persistent grants feature to xenbus");
1137
1138 err = xenbus_transaction_end(xbt, 0);
1139 if (err) {
1140 if (err == -EAGAIN)
1141 goto again;
1142 xenbus_dev_fatal(dev, err, "completing transaction");
1143 goto destroy_blkring;
1144 }
1145
1146 xenbus_switch_state(dev, XenbusStateInitialised);
1147
1148 return 0;
1149
1150 abort_transaction:
1151 xenbus_transaction_end(xbt, 1);
1152 if (message)
1153 xenbus_dev_fatal(dev, err, "%s", message);
1154 destroy_blkring:
1155 blkif_free(info, 0);
1156 out:
1157 return err;
1158 }
1159
1160 /**
1161 * Entry point to this code when a new device is created. Allocate the basic
1162 * structures and the ring buffer for communication with the backend, and
1163 * inform the backend of the appropriate details for those. Switch to
1164 * Initialised state.
1165 */
1166 static int blkfront_probe(struct xenbus_device *dev,
1167 const struct xenbus_device_id *id)
1168 {
1169 int err, vdevice, i;
1170 struct blkfront_info *info;
1171
1172 /* FIXME: Use dynamic device id if this is not set. */
1173 err = xenbus_scanf(XBT_NIL, dev->nodename,
1174 "virtual-device", "%i", &vdevice);
1175 if (err != 1) {
1176 /* go looking in the extended area instead */
1177 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
1178 "%i", &vdevice);
1179 if (err != 1) {
1180 xenbus_dev_fatal(dev, err, "reading virtual-device");
1181 return err;
1182 }
1183 }
1184
1185 if (xen_hvm_domain()) {
1186 char *type;
1187 int len;
1188 /* no unplug has been done: do not hook devices != xen vbds */
1189 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
1190 int major;
1191
1192 if (!VDEV_IS_EXTENDED(vdevice))
1193 major = BLKIF_MAJOR(vdevice);
1194 else
1195 major = XENVBD_MAJOR;
1196
1197 if (major != XENVBD_MAJOR) {
1198 printk(KERN_INFO
1199 "%s: HVM does not support vbd %d as xen block device\n",
1200 __FUNCTION__, vdevice);
1201 return -ENODEV;
1202 }
1203 }
1204 /* do not create a PV cdrom device if we are an HVM guest */
1205 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
1206 if (IS_ERR(type))
1207 return -ENODEV;
1208 if (strncmp(type, "cdrom", 5) == 0) {
1209 kfree(type);
1210 return -ENODEV;
1211 }
1212 kfree(type);
1213 }
1214 info = kzalloc(sizeof(*info), GFP_KERNEL);
1215 if (!info) {
1216 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
1217 return -ENOMEM;
1218 }
1219
1220 mutex_init(&info->mutex);
1221 spin_lock_init(&info->io_lock);
1222 info->xbdev = dev;
1223 info->vdevice = vdevice;
1224 INIT_LIST_HEAD(&info->grants);
1225 info->persistent_gnts_c = 0;
1226 info->connected = BLKIF_STATE_DISCONNECTED;
1227 INIT_WORK(&info->work, blkif_restart_queue);
1228
1229 for (i = 0; i < BLK_RING_SIZE; i++)
1230 info->shadow[i].req.u.rw.id = i+1;
1231 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
1232
1233 /* Front end dir is a number, which is used as the id. */
1234 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
1235 dev_set_drvdata(&dev->dev, info);
1236
1237 err = talk_to_blkback(dev, info);
1238 if (err) {
1239 kfree(info);
1240 dev_set_drvdata(&dev->dev, NULL);
1241 return err;
1242 }
1243
1244 return 0;
1245 }
1246
1247
1248 static int blkif_recover(struct blkfront_info *info)
1249 {
1250 int i;
1251 struct blkif_request *req;
1252 struct blk_shadow *copy;
1253 unsigned int persistent;
1254 int j, rc;
1255
1256 /* Stage 1: Make a safe copy of the shadow state. */
1257 copy = kmemdup(info->shadow, sizeof(info->shadow),
1258 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
1259 if (!copy)
1260 return -ENOMEM;
1261
1262 /* Stage 2: Set up free list. */
1263 memset(&info->shadow, 0, sizeof(info->shadow));
1264 for (i = 0; i < BLK_RING_SIZE; i++)
1265 info->shadow[i].req.u.rw.id = i+1;
1266 info->shadow_free = info->ring.req_prod_pvt;
1267 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
1268
1269 /* Check if the backend supports persistent grants */
1270 rc = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1271 "feature-persistent", "%u", &persistent,
1272 NULL);
1273 if (rc)
1274 info->feature_persistent = 0;
1275 else
1276 info->feature_persistent = persistent;
1277
1278 /* Allocate memory for grants */
1279 rc = fill_grant_buffer(info, BLK_RING_SIZE *
1280 BLKIF_MAX_SEGMENTS_PER_REQUEST);
1281 if (rc) {
1282 xenbus_dev_fatal(info->xbdev, rc, "setting grant buffer failed");
1283 kfree(copy);
1284 return rc;
1285 }
1286
1287 /* Stage 3: Find pending requests and requeue them. */
1288 for (i = 0; i < BLK_RING_SIZE; i++) {
1289 /* Not in use? */
1290 if (!copy[i].request)
1291 continue;
1292
1293 /* Grab a request slot and copy shadow state into it. */
1294 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
1295 *req = copy[i].req;
1296
1297 /* We get a new request id, and must reset the shadow state. */
1298 req->u.rw.id = get_id_from_freelist(info);
1299 memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
1300
1301 if (req->operation != BLKIF_OP_DISCARD) {
1302 /* Rewrite any grant references invalidated by susp/resume. */
1303 for (j = 0; j < req->u.rw.nr_segments; j++)
1304 gnttab_grant_foreign_access_ref(
1305 req->u.rw.seg[j].gref,
1306 info->xbdev->otherend_id,
1307 pfn_to_mfn(copy[i].grants_used[j]->pfn),
1308 0);
1309 }
1310 info->shadow[req->u.rw.id].req = *req;
1311
1312 info->ring.req_prod_pvt++;
1313 }
1314
1315 kfree(copy);
1316
1317 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1318
1319 spin_lock_irq(&info->io_lock);
1320
1321 /* Now safe for us to use the shared ring */
1322 info->connected = BLKIF_STATE_CONNECTED;
1323
1324 /* Send off requeued requests */
1325 flush_requests(info);
1326
1327 /* Kick any other new requests queued since we resumed */
1328 kick_pending_request_queues(info);
1329
1330 spin_unlock_irq(&info->io_lock);
1331
1332 return 0;
1333 }
1334
1335 /**
1336 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1337 * driver restart. We tear down our blkif structure and recreate it, but
1338 * leave the device-layer structures intact so that this is transparent to the
1339 * rest of the kernel.
1340 */
1341 static int blkfront_resume(struct xenbus_device *dev)
1342 {
1343 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1344 int err;
1345
1346 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
1347
1348 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
1349
1350 err = talk_to_blkback(dev, info);
1351
1352 /*
1353 * We have to wait for the backend to switch to
1354 * connected state, since we want to read which
1355 * features it supports.
1356 */
1357
1358 return err;
1359 }
1360
1361 static void
1362 blkfront_closing(struct blkfront_info *info)
1363 {
1364 struct xenbus_device *xbdev = info->xbdev;
1365 struct block_device *bdev = NULL;
1366
1367 mutex_lock(&info->mutex);
1368
1369 if (xbdev->state == XenbusStateClosing) {
1370 mutex_unlock(&info->mutex);
1371 return;
1372 }
1373
1374 if (info->gd)
1375 bdev = bdget_disk(info->gd, 0);
1376
1377 mutex_unlock(&info->mutex);
1378
1379 if (!bdev) {
1380 xenbus_frontend_closed(xbdev);
1381 return;
1382 }
1383
1384 mutex_lock(&bdev->bd_mutex);
1385
1386 if (bdev->bd_openers) {
1387 xenbus_dev_error(xbdev, -EBUSY,
1388 "Device in use; refusing to close");
1389 xenbus_switch_state(xbdev, XenbusStateClosing);
1390 } else {
1391 xlvbd_release_gendisk(info);
1392 xenbus_frontend_closed(xbdev);
1393 }
1394
1395 mutex_unlock(&bdev->bd_mutex);
1396 bdput(bdev);
1397 }
1398
1399 static void blkfront_setup_discard(struct blkfront_info *info)
1400 {
1401 int err;
1402 char *type;
1403 unsigned int discard_granularity;
1404 unsigned int discard_alignment;
1405 unsigned int discard_secure;
1406
1407 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1408 if (IS_ERR(type))
1409 return;
1410
1411 info->feature_secdiscard = 0;
1412 if (strncmp(type, "phy", 3) == 0) {
1413 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1414 "discard-granularity", "%u", &discard_granularity,
1415 "discard-alignment", "%u", &discard_alignment,
1416 NULL);
1417 if (!err) {
1418 info->feature_discard = 1;
1419 info->discard_granularity = discard_granularity;
1420 info->discard_alignment = discard_alignment;
1421 }
1422 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1423 "discard-secure", "%d", &discard_secure,
1424 NULL);
1425 if (!err)
1426 info->feature_secdiscard = discard_secure;
1427
1428 } else if (strncmp(type, "file", 4) == 0)
1429 info->feature_discard = 1;
1430
1431 kfree(type);
1432 }
1433
1434 /*
1435 * Invoked when the backend is finally 'ready' (and has told produced
1436 * the details about the physical device - #sectors, size, etc).
1437 */
1438 static void blkfront_connect(struct blkfront_info *info)
1439 {
1440 unsigned long long sectors;
1441 unsigned long sector_size;
1442 unsigned int binfo;
1443 int err;
1444 int barrier, flush, discard, persistent;
1445
1446 switch (info->connected) {
1447 case BLKIF_STATE_CONNECTED:
1448 /*
1449 * Potentially, the back-end may be signalling
1450 * a capacity change; update the capacity.
1451 */
1452 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1453 "sectors", "%Lu", &sectors);
1454 if (XENBUS_EXIST_ERR(err))
1455 return;
1456 printk(KERN_INFO "Setting capacity to %Lu\n",
1457 sectors);
1458 set_capacity(info->gd, sectors);
1459 revalidate_disk(info->gd);
1460 return;
1461
1462 case BLKIF_STATE_SUSPENDED:
1463 /*
1464 * If we are recovering from suspension, we need to wait
1465 * for the backend to announce it's features before
1466 * reconnecting, we need to know if the backend supports
1467 * persistent grants.
1468 */
1469 blkif_recover(info);
1470 return;
1471
1472 default:
1473 break;
1474 }
1475
1476 dev_dbg(&info->xbdev->dev, "%s:%s.\n",
1477 __func__, info->xbdev->otherend);
1478
1479 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1480 "sectors", "%llu", &sectors,
1481 "info", "%u", &binfo,
1482 "sector-size", "%lu", &sector_size,
1483 NULL);
1484 if (err) {
1485 xenbus_dev_fatal(info->xbdev, err,
1486 "reading backend fields at %s",
1487 info->xbdev->otherend);
1488 return;
1489 }
1490
1491 info->feature_flush = 0;
1492 info->flush_op = 0;
1493
1494 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1495 "feature-barrier", "%d", &barrier,
1496 NULL);
1497
1498 /*
1499 * If there's no "feature-barrier" defined, then it means
1500 * we're dealing with a very old backend which writes
1501 * synchronously; nothing to do.
1502 *
1503 * If there are barriers, then we use flush.
1504 */
1505 if (!err && barrier) {
1506 info->feature_flush = REQ_FLUSH | REQ_FUA;
1507 info->flush_op = BLKIF_OP_WRITE_BARRIER;
1508 }
1509 /*
1510 * And if there is "feature-flush-cache" use that above
1511 * barriers.
1512 */
1513 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1514 "feature-flush-cache", "%d", &flush,
1515 NULL);
1516
1517 if (!err && flush) {
1518 info->feature_flush = REQ_FLUSH;
1519 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
1520 }
1521
1522 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1523 "feature-discard", "%d", &discard,
1524 NULL);
1525
1526 if (!err && discard)
1527 blkfront_setup_discard(info);
1528
1529 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1530 "feature-persistent", "%u", &persistent,
1531 NULL);
1532 if (err)
1533 info->feature_persistent = 0;
1534 else
1535 info->feature_persistent = persistent;
1536
1537 /* Allocate memory for grants */
1538 err = fill_grant_buffer(info, BLK_RING_SIZE *
1539 BLKIF_MAX_SEGMENTS_PER_REQUEST);
1540 if (err) {
1541 xenbus_dev_fatal(info->xbdev, err, "setting grant buffer failed");
1542 return;
1543 }
1544
1545 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1546 if (err) {
1547 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
1548 info->xbdev->otherend);
1549 return;
1550 }
1551
1552 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1553
1554 /* Kick pending requests. */
1555 spin_lock_irq(&info->io_lock);
1556 info->connected = BLKIF_STATE_CONNECTED;
1557 kick_pending_request_queues(info);
1558 spin_unlock_irq(&info->io_lock);
1559
1560 add_disk(info->gd);
1561
1562 info->is_ready = 1;
1563 }
1564
1565 /**
1566 * Callback received when the backend's state changes.
1567 */
1568 static void blkback_changed(struct xenbus_device *dev,
1569 enum xenbus_state backend_state)
1570 {
1571 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1572
1573 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
1574
1575 switch (backend_state) {
1576 case XenbusStateInitialising:
1577 case XenbusStateInitWait:
1578 case XenbusStateInitialised:
1579 case XenbusStateReconfiguring:
1580 case XenbusStateReconfigured:
1581 case XenbusStateUnknown:
1582 break;
1583
1584 case XenbusStateConnected:
1585 blkfront_connect(info);
1586 break;
1587
1588 case XenbusStateClosed:
1589 if (dev->state == XenbusStateClosed)
1590 break;
1591 /* Missed the backend's Closing state -- fallthrough */
1592 case XenbusStateClosing:
1593 if (info)
1594 blkfront_closing(info);
1595 break;
1596 }
1597 }
1598
1599 static int blkfront_remove(struct xenbus_device *xbdev)
1600 {
1601 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
1602 struct block_device *bdev = NULL;
1603 struct gendisk *disk;
1604
1605 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
1606
1607 blkif_free(info, 0);
1608
1609 mutex_lock(&info->mutex);
1610
1611 disk = info->gd;
1612 if (disk)
1613 bdev = bdget_disk(disk, 0);
1614
1615 info->xbdev = NULL;
1616 mutex_unlock(&info->mutex);
1617
1618 if (!bdev) {
1619 kfree(info);
1620 return 0;
1621 }
1622
1623 /*
1624 * The xbdev was removed before we reached the Closed
1625 * state. See if it's safe to remove the disk. If the bdev
1626 * isn't closed yet, we let release take care of it.
1627 */
1628
1629 mutex_lock(&bdev->bd_mutex);
1630 info = disk->private_data;
1631
1632 dev_warn(disk_to_dev(disk),
1633 "%s was hot-unplugged, %d stale handles\n",
1634 xbdev->nodename, bdev->bd_openers);
1635
1636 if (info && !bdev->bd_openers) {
1637 xlvbd_release_gendisk(info);
1638 disk->private_data = NULL;
1639 kfree(info);
1640 }
1641
1642 mutex_unlock(&bdev->bd_mutex);
1643 bdput(bdev);
1644
1645 return 0;
1646 }
1647
1648 static int blkfront_is_ready(struct xenbus_device *dev)
1649 {
1650 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1651
1652 return info->is_ready && info->xbdev;
1653 }
1654
1655 static int blkif_open(struct block_device *bdev, fmode_t mode)
1656 {
1657 struct gendisk *disk = bdev->bd_disk;
1658 struct blkfront_info *info;
1659 int err = 0;
1660
1661 mutex_lock(&blkfront_mutex);
1662
1663 info = disk->private_data;
1664 if (!info) {
1665 /* xbdev gone */
1666 err = -ERESTARTSYS;
1667 goto out;
1668 }
1669
1670 mutex_lock(&info->mutex);
1671
1672 if (!info->gd)
1673 /* xbdev is closed */
1674 err = -ERESTARTSYS;
1675
1676 mutex_unlock(&info->mutex);
1677
1678 out:
1679 mutex_unlock(&blkfront_mutex);
1680 return err;
1681 }
1682
1683 static void blkif_release(struct gendisk *disk, fmode_t mode)
1684 {
1685 struct blkfront_info *info = disk->private_data;
1686 struct block_device *bdev;
1687 struct xenbus_device *xbdev;
1688
1689 mutex_lock(&blkfront_mutex);
1690
1691 bdev = bdget_disk(disk, 0);
1692
1693 if (bdev->bd_openers)
1694 goto out;
1695
1696 /*
1697 * Check if we have been instructed to close. We will have
1698 * deferred this request, because the bdev was still open.
1699 */
1700
1701 mutex_lock(&info->mutex);
1702 xbdev = info->xbdev;
1703
1704 if (xbdev && xbdev->state == XenbusStateClosing) {
1705 /* pending switch to state closed */
1706 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1707 xlvbd_release_gendisk(info);
1708 xenbus_frontend_closed(info->xbdev);
1709 }
1710
1711 mutex_unlock(&info->mutex);
1712
1713 if (!xbdev) {
1714 /* sudden device removal */
1715 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1716 xlvbd_release_gendisk(info);
1717 disk->private_data = NULL;
1718 kfree(info);
1719 }
1720
1721 out:
1722 bdput(bdev);
1723 mutex_unlock(&blkfront_mutex);
1724 }
1725
1726 static const struct block_device_operations xlvbd_block_fops =
1727 {
1728 .owner = THIS_MODULE,
1729 .open = blkif_open,
1730 .release = blkif_release,
1731 .getgeo = blkif_getgeo,
1732 .ioctl = blkif_ioctl,
1733 };
1734
1735
1736 static const struct xenbus_device_id blkfront_ids[] = {
1737 { "vbd" },
1738 { "" }
1739 };
1740
1741 static DEFINE_XENBUS_DRIVER(blkfront, ,
1742 .probe = blkfront_probe,
1743 .remove = blkfront_remove,
1744 .resume = blkfront_resume,
1745 .otherend_changed = blkback_changed,
1746 .is_ready = blkfront_is_ready,
1747 );
1748
1749 static int __init xlblk_init(void)
1750 {
1751 int ret;
1752
1753 if (!xen_domain())
1754 return -ENODEV;
1755
1756 if (xen_hvm_domain() && !xen_platform_pci_unplug)
1757 return -ENODEV;
1758
1759 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
1760 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
1761 XENVBD_MAJOR, DEV_NAME);
1762 return -ENODEV;
1763 }
1764
1765 ret = xenbus_register_frontend(&blkfront_driver);
1766 if (ret) {
1767 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1768 return ret;
1769 }
1770
1771 return 0;
1772 }
1773 module_init(xlblk_init);
1774
1775
1776 static void __exit xlblk_exit(void)
1777 {
1778 xenbus_unregister_driver(&blkfront_driver);
1779 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1780 kfree(minors);
1781 }
1782 module_exit(xlblk_exit);
1783
1784 MODULE_DESCRIPTION("Xen virtual block device frontend");
1785 MODULE_LICENSE("GPL");
1786 MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
1787 MODULE_ALIAS("xen:vbd");
1788 MODULE_ALIAS("xenblk");