1 | /****************************************************************************** |
---|
2 | * gntdev.c |
---|
3 | * |
---|
4 | * Device for accessing (in user-space) pages that have been granted by other |
---|
5 | * domains. |
---|
6 | * |
---|
7 | * Copyright (c) 2006-2007, D G Murray. |
---|
8 | * |
---|
9 | * This program is distributed in the hope that it will be useful, |
---|
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
12 | * GNU General Public License for more details. |
---|
13 | * |
---|
14 | * You should have received a copy of the GNU General Public License |
---|
15 | * along with this program; if not, write to the Free Software |
---|
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
17 | */ |
---|
18 | |
---|
19 | #include <asm/atomic.h> |
---|
20 | #include <linux/module.h> |
---|
21 | #include <linux/kernel.h> |
---|
22 | #include <linux/init.h> |
---|
23 | #include <linux/fs.h> |
---|
24 | #include <linux/device.h> |
---|
25 | #include <linux/mm.h> |
---|
26 | #include <linux/mman.h> |
---|
27 | #include <asm/uaccess.h> |
---|
28 | #include <asm/io.h> |
---|
29 | #include <xen/gnttab.h> |
---|
30 | #include <asm/hypervisor.h> |
---|
31 | #include <xen/balloon.h> |
---|
32 | #include <xen/evtchn.h> |
---|
33 | #include <xen/driver_util.h> |
---|
34 | |
---|
35 | #include <linux/types.h> |
---|
36 | #include <xen/public/gntdev.h> |
---|
37 | |
---|
38 | |
---|
39 | #define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>" |
---|
40 | #define DRIVER_DESC "User-space granted page access driver" |
---|
41 | |
---|
42 | MODULE_LICENSE("GPL"); |
---|
43 | MODULE_AUTHOR(DRIVER_AUTHOR); |
---|
44 | MODULE_DESCRIPTION(DRIVER_DESC); |
---|
45 | |
---|
46 | #define MAX_GRANTS 128 |
---|
47 | |
---|
48 | /* A slot can be in one of three states: |
---|
49 | * |
---|
50 | * 0. GNTDEV_SLOT_INVALID: |
---|
51 | * This slot is not associated with a grant reference, and is therefore free |
---|
52 | * to be overwritten by a new grant reference. |
---|
53 | * |
---|
54 | * 1. GNTDEV_SLOT_NOT_YET_MAPPED: |
---|
55 | * This slot is associated with a grant reference (via the |
---|
56 | * IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed. |
---|
57 | * |
---|
58 | * 2. GNTDEV_SLOT_MAPPED: |
---|
59 | * This slot is associated with a grant reference, and has been mmap()-ed. |
---|
60 | */ |
---|
61 | typedef enum gntdev_slot_state { |
---|
62 | GNTDEV_SLOT_INVALID = 0, |
---|
63 | GNTDEV_SLOT_NOT_YET_MAPPED, |
---|
64 | GNTDEV_SLOT_MAPPED |
---|
65 | } gntdev_slot_state_t; |
---|
66 | |
---|
67 | #define GNTDEV_INVALID_HANDLE -1 |
---|
68 | #define GNTDEV_FREE_LIST_INVALID -1 |
---|
69 | /* Each opened instance of gntdev is associated with a list of grants, |
---|
70 | * represented by an array of elements of the following type, |
---|
71 | * gntdev_grant_info_t. |
---|
72 | */ |
---|
73 | typedef struct gntdev_grant_info { |
---|
74 | gntdev_slot_state_t state; |
---|
75 | union { |
---|
76 | uint32_t free_list_index; |
---|
77 | struct { |
---|
78 | domid_t domid; |
---|
79 | grant_ref_t ref; |
---|
80 | grant_handle_t kernel_handle; |
---|
81 | grant_handle_t user_handle; |
---|
82 | uint64_t dev_bus_addr; |
---|
83 | } valid; |
---|
84 | } u; |
---|
85 | } gntdev_grant_info_t; |
---|
86 | |
---|
87 | /* Private data structure, which is stored in the file pointer for files |
---|
88 | * associated with this device. |
---|
89 | */ |
---|
90 | typedef struct gntdev_file_private_data { |
---|
91 | |
---|
92 | /* Array of grant information. */ |
---|
93 | gntdev_grant_info_t grants[MAX_GRANTS]; |
---|
94 | |
---|
95 | /* Read/write semaphore used to protect the grants array. */ |
---|
96 | struct rw_semaphore grants_sem; |
---|
97 | |
---|
98 | /* An array of indices of free slots in the grants array. |
---|
99 | * N.B. An entry in this list may temporarily have the value |
---|
100 | * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed |
---|
101 | * from the list by the contiguous allocator, but the list has not yet |
---|
102 | * been compressed. However, this is not visible across invocations of |
---|
103 | * the device. |
---|
104 | */ |
---|
105 | int32_t free_list[MAX_GRANTS]; |
---|
106 | |
---|
107 | /* The number of free slots in the grants array. */ |
---|
108 | uint32_t free_list_size; |
---|
109 | |
---|
110 | /* Read/write semaphore used to protect the free list. */ |
---|
111 | struct rw_semaphore free_list_sem; |
---|
112 | |
---|
113 | /* Index of the next slot after the most recent contiguous allocation, |
---|
114 | * for use in a next-fit allocator. |
---|
115 | */ |
---|
116 | uint32_t next_fit_index; |
---|
117 | |
---|
118 | /* Used to map grants into the kernel, before mapping them into user |
---|
119 | * space. |
---|
120 | */ |
---|
121 | struct page **foreign_pages; |
---|
122 | |
---|
123 | } gntdev_file_private_data_t; |
---|
124 | |
---|
125 | /* Module lifecycle operations. */ |
---|
126 | static int __init gntdev_init(void); |
---|
127 | static void __exit gntdev_exit(void); |
---|
128 | |
---|
129 | module_init(gntdev_init); |
---|
130 | module_exit(gntdev_exit); |
---|
131 | |
---|
132 | /* File operations. */ |
---|
133 | static int gntdev_open(struct inode *inode, struct file *flip); |
---|
134 | static int gntdev_release(struct inode *inode, struct file *flip); |
---|
135 | static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma); |
---|
136 | static int gntdev_ioctl (struct inode *inode, struct file *flip, |
---|
137 | unsigned int cmd, unsigned long arg); |
---|
138 | |
---|
139 | static struct file_operations gntdev_fops = { |
---|
140 | .owner = THIS_MODULE, |
---|
141 | .open = gntdev_open, |
---|
142 | .release = gntdev_release, |
---|
143 | .mmap = gntdev_mmap, |
---|
144 | .ioctl = gntdev_ioctl |
---|
145 | }; |
---|
146 | |
---|
147 | /* VM operations. */ |
---|
148 | static void gntdev_vma_close(struct vm_area_struct *vma); |
---|
149 | static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr, |
---|
150 | pte_t *ptep, int is_fullmm); |
---|
151 | |
---|
152 | static struct vm_operations_struct gntdev_vmops = { |
---|
153 | .close = gntdev_vma_close, |
---|
154 | .zap_pte = gntdev_clear_pte |
---|
155 | }; |
---|
156 | |
---|
157 | /* Global variables. */ |
---|
158 | |
---|
159 | /* The driver major number, for use when unregistering the driver. */ |
---|
160 | static int gntdev_major; |
---|
161 | |
---|
162 | #define GNTDEV_NAME "gntdev" |
---|
163 | |
---|
164 | /* Memory mapping functions |
---|
165 | * ------------------------ |
---|
166 | * |
---|
167 | * Every granted page is mapped into both kernel and user space, and the two |
---|
168 | * following functions return the respective virtual addresses of these pages. |
---|
169 | * |
---|
170 | * When shadow paging is disabled, the granted page is mapped directly into |
---|
171 | * user space; when it is enabled, it is mapped into the kernel and remapped |
---|
172 | * into user space using vm_insert_page() (see gntdev_mmap(), below). |
---|
173 | */ |
---|
174 | |
---|
175 | /* Returns the virtual address (in user space) of the @page_index'th page |
---|
176 | * in the given VM area. |
---|
177 | */ |
---|
178 | static inline unsigned long get_user_vaddr (struct vm_area_struct *vma, |
---|
179 | int page_index) |
---|
180 | { |
---|
181 | return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT); |
---|
182 | } |
---|
183 | |
---|
184 | /* Returns the virtual address (in kernel space) of the @slot_index'th page |
---|
185 | * mapped by the gntdev instance that owns the given private data struct. |
---|
186 | */ |
---|
187 | static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv, |
---|
188 | int slot_index) |
---|
189 | { |
---|
190 | unsigned long pfn; |
---|
191 | void *kaddr; |
---|
192 | pfn = page_to_pfn(priv->foreign_pages[slot_index]); |
---|
193 | kaddr = pfn_to_kaddr(pfn); |
---|
194 | return (unsigned long) kaddr; |
---|
195 | } |
---|
196 | |
---|
197 | /* Helper functions. */ |
---|
198 | |
---|
199 | /* Adds information about a grant reference to the list of grants in the file's |
---|
200 | * private data structure. Returns non-zero on failure. On success, sets the |
---|
201 | * value of *offset to the offset that should be mmap()-ed in order to map the |
---|
202 | * grant reference. |
---|
203 | */ |
---|
204 | static int add_grant_reference(struct file *flip, |
---|
205 | struct ioctl_gntdev_grant_ref *op, |
---|
206 | uint64_t *offset) |
---|
207 | { |
---|
208 | gntdev_file_private_data_t *private_data |
---|
209 | = (gntdev_file_private_data_t *) flip->private_data; |
---|
210 | |
---|
211 | uint32_t slot_index; |
---|
212 | |
---|
213 | if (unlikely(private_data->free_list_size == 0)) { |
---|
214 | return -ENOMEM; |
---|
215 | } |
---|
216 | |
---|
217 | slot_index = private_data->free_list[--private_data->free_list_size]; |
---|
218 | |
---|
219 | /* Copy the grant information into file's private data. */ |
---|
220 | private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED; |
---|
221 | private_data->grants[slot_index].u.valid.domid = op->domid; |
---|
222 | private_data->grants[slot_index].u.valid.ref = op->ref; |
---|
223 | |
---|
224 | /* The offset is calculated as the index of the chosen entry in the |
---|
225 | * file's private data's array of grant information. This is then |
---|
226 | * shifted to give an offset into the virtual "file address space". |
---|
227 | */ |
---|
228 | *offset = slot_index << PAGE_SHIFT; |
---|
229 | |
---|
230 | return 0; |
---|
231 | } |
---|
232 | |
---|
233 | /* Adds the @count grant references to the contiguous range in the slot array |
---|
234 | * beginning at @first_slot. It is assumed that @first_slot was returned by a |
---|
235 | * previous invocation of find_contiguous_free_range(), during the same |
---|
236 | * invocation of the driver. |
---|
237 | */ |
---|
238 | static int add_grant_references(struct file *flip, |
---|
239 | int count, |
---|
240 | struct ioctl_gntdev_grant_ref *ops, |
---|
241 | uint32_t first_slot) |
---|
242 | { |
---|
243 | gntdev_file_private_data_t *private_data |
---|
244 | = (gntdev_file_private_data_t *) flip->private_data; |
---|
245 | int i; |
---|
246 | |
---|
247 | for (i = 0; i < count; ++i) { |
---|
248 | |
---|
249 | /* First, mark the slot's entry in the free list as invalid. */ |
---|
250 | int free_list_index = |
---|
251 | private_data->grants[first_slot+i].u.free_list_index; |
---|
252 | private_data->free_list[free_list_index] = |
---|
253 | GNTDEV_FREE_LIST_INVALID; |
---|
254 | |
---|
255 | /* Now, update the slot. */ |
---|
256 | private_data->grants[first_slot+i].state = |
---|
257 | GNTDEV_SLOT_NOT_YET_MAPPED; |
---|
258 | private_data->grants[first_slot+i].u.valid.domid = |
---|
259 | ops[i].domid; |
---|
260 | private_data->grants[first_slot+i].u.valid.ref = ops[i].ref; |
---|
261 | } |
---|
262 | |
---|
263 | return 0; |
---|
264 | } |
---|
265 | |
---|
266 | /* Scans through the free list for @flip, removing entries that are marked as |
---|
267 | * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to |
---|
268 | * the number of valid entries. |
---|
269 | */ |
---|
270 | static void compress_free_list(struct file *flip) |
---|
271 | { |
---|
272 | gntdev_file_private_data_t *private_data |
---|
273 | = (gntdev_file_private_data_t *) flip->private_data; |
---|
274 | int i, j = 0, old_size; |
---|
275 | |
---|
276 | old_size = private_data->free_list_size; |
---|
277 | for (i = 0; i < old_size; ++i) { |
---|
278 | if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) { |
---|
279 | private_data->free_list[j] = |
---|
280 | private_data->free_list[i]; |
---|
281 | ++j; |
---|
282 | } else { |
---|
283 | --private_data->free_list_size; |
---|
284 | } |
---|
285 | } |
---|
286 | } |
---|
287 | |
---|
288 | /* Searches the grant array in the private data of @flip for a range of |
---|
289 | * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state. |
---|
290 | * |
---|
291 | * Returns the index of the first slot if a range is found, otherwise -ENOMEM. |
---|
292 | */ |
---|
293 | static int find_contiguous_free_range(struct file *flip, |
---|
294 | uint32_t num_slots) |
---|
295 | { |
---|
296 | gntdev_file_private_data_t *private_data |
---|
297 | = (gntdev_file_private_data_t *) flip->private_data; |
---|
298 | |
---|
299 | int i; |
---|
300 | int start_index = private_data->next_fit_index; |
---|
301 | int range_start = 0, range_length; |
---|
302 | |
---|
303 | if (private_data->free_list_size < num_slots) { |
---|
304 | return -ENOMEM; |
---|
305 | } |
---|
306 | |
---|
307 | /* First search from the start_index to the end of the array. */ |
---|
308 | range_length = 0; |
---|
309 | for (i = start_index; i < MAX_GRANTS; ++i) { |
---|
310 | if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) { |
---|
311 | if (range_length == 0) { |
---|
312 | range_start = i; |
---|
313 | } |
---|
314 | ++range_length; |
---|
315 | if (range_length == num_slots) { |
---|
316 | return range_start; |
---|
317 | } |
---|
318 | } |
---|
319 | } |
---|
320 | |
---|
321 | /* Now search from the start of the array to the start_index. */ |
---|
322 | range_length = 0; |
---|
323 | for (i = 0; i < start_index; ++i) { |
---|
324 | if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) { |
---|
325 | if (range_length == 0) { |
---|
326 | range_start = i; |
---|
327 | } |
---|
328 | ++range_length; |
---|
329 | if (range_length == num_slots) { |
---|
330 | return range_start; |
---|
331 | } |
---|
332 | } |
---|
333 | } |
---|
334 | |
---|
335 | return -ENOMEM; |
---|
336 | } |
---|
337 | |
---|
338 | /* Interface functions. */ |
---|
339 | |
---|
340 | /* Initialises the driver. Called when the module is loaded. */ |
---|
341 | static int __init gntdev_init(void) |
---|
342 | { |
---|
343 | struct class *class; |
---|
344 | struct class_device *device; |
---|
345 | |
---|
346 | if (!is_running_on_xen()) { |
---|
347 | printk(KERN_ERR "You must be running Xen to use gntdev\n"); |
---|
348 | return -ENODEV; |
---|
349 | } |
---|
350 | |
---|
351 | gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops); |
---|
352 | if (gntdev_major < 0) |
---|
353 | { |
---|
354 | printk(KERN_ERR "Could not register gntdev device\n"); |
---|
355 | return -ENOMEM; |
---|
356 | } |
---|
357 | |
---|
358 | /* Note that if the sysfs code fails, we will still initialise the |
---|
359 | * device, and output the major number so that the device can be |
---|
360 | * created manually using mknod. |
---|
361 | */ |
---|
362 | if ((class = get_xen_class()) == NULL) { |
---|
363 | printk(KERN_ERR "Error setting up xen_class\n"); |
---|
364 | printk(KERN_ERR "gntdev created with major number = %d\n", |
---|
365 | gntdev_major); |
---|
366 | return 0; |
---|
367 | } |
---|
368 | |
---|
369 | device = class_device_create(class, NULL, MKDEV(gntdev_major, 0), |
---|
370 | NULL, GNTDEV_NAME); |
---|
371 | if (IS_ERR(device)) { |
---|
372 | printk(KERN_ERR "Error creating gntdev device in xen_class\n"); |
---|
373 | printk(KERN_ERR "gntdev created with major number = %d\n", |
---|
374 | gntdev_major); |
---|
375 | return 0; |
---|
376 | } |
---|
377 | |
---|
378 | return 0; |
---|
379 | } |
---|
380 | |
---|
381 | /* Cleans up and unregisters the driver. Called when the driver is unloaded. |
---|
382 | */ |
---|
383 | static void __exit gntdev_exit(void) |
---|
384 | { |
---|
385 | struct class *class; |
---|
386 | if ((class = get_xen_class()) != NULL) |
---|
387 | class_device_destroy(class, MKDEV(gntdev_major, 0)); |
---|
388 | unregister_chrdev(gntdev_major, GNTDEV_NAME); |
---|
389 | } |
---|
390 | |
---|
391 | /* Called when the device is opened. */ |
---|
392 | static int gntdev_open(struct inode *inode, struct file *flip) |
---|
393 | { |
---|
394 | gntdev_file_private_data_t *private_data; |
---|
395 | int i; |
---|
396 | |
---|
397 | try_module_get(THIS_MODULE); |
---|
398 | |
---|
399 | /* Allocate space for the per-instance private data. */ |
---|
400 | private_data = kmalloc(sizeof(*private_data), GFP_KERNEL); |
---|
401 | if (!private_data) |
---|
402 | goto nomem_out; |
---|
403 | |
---|
404 | /* Allocate space for the kernel-mapping of granted pages. */ |
---|
405 | private_data->foreign_pages = |
---|
406 | alloc_empty_pages_and_pagevec(MAX_GRANTS); |
---|
407 | if (!private_data->foreign_pages) |
---|
408 | goto nomem_out2; |
---|
409 | |
---|
410 | /* Initialise the free-list, which contains all slots at first. |
---|
411 | */ |
---|
412 | for (i = 0; i < MAX_GRANTS; ++i) { |
---|
413 | private_data->free_list[MAX_GRANTS - i - 1] = i; |
---|
414 | private_data->grants[i].state = GNTDEV_SLOT_INVALID; |
---|
415 | private_data->grants[i].u.free_list_index = MAX_GRANTS - i - 1; |
---|
416 | } |
---|
417 | private_data->free_list_size = MAX_GRANTS; |
---|
418 | private_data->next_fit_index = 0; |
---|
419 | |
---|
420 | init_rwsem(&private_data->grants_sem); |
---|
421 | init_rwsem(&private_data->free_list_sem); |
---|
422 | |
---|
423 | flip->private_data = private_data; |
---|
424 | |
---|
425 | return 0; |
---|
426 | |
---|
427 | nomem_out2: |
---|
428 | kfree(private_data); |
---|
429 | nomem_out: |
---|
430 | return -ENOMEM; |
---|
431 | } |
---|
432 | |
---|
433 | /* Called when the device is closed. |
---|
434 | */ |
---|
435 | static int gntdev_release(struct inode *inode, struct file *flip) |
---|
436 | { |
---|
437 | if (flip->private_data) { |
---|
438 | gntdev_file_private_data_t *private_data = |
---|
439 | (gntdev_file_private_data_t *) flip->private_data; |
---|
440 | if (private_data->foreign_pages) { |
---|
441 | free_empty_pages_and_pagevec |
---|
442 | (private_data->foreign_pages, MAX_GRANTS); |
---|
443 | } |
---|
444 | kfree(private_data); |
---|
445 | } |
---|
446 | module_put(THIS_MODULE); |
---|
447 | return 0; |
---|
448 | } |
---|
449 | |
---|
450 | /* Called when an attempt is made to mmap() the device. The private data from |
---|
451 | * @flip contains the list of grant references that can be mapped. The vm_pgoff |
---|
452 | * field of @vma contains the index into that list that refers to the grant |
---|
453 | * reference that will be mapped. Only mappings that are a multiple of |
---|
454 | * PAGE_SIZE are handled. |
---|
455 | */ |
---|
456 | static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma) |
---|
457 | { |
---|
458 | struct gnttab_map_grant_ref op; |
---|
459 | unsigned long slot_index = vma->vm_pgoff; |
---|
460 | unsigned long kernel_vaddr, user_vaddr; |
---|
461 | uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; |
---|
462 | uint64_t ptep; |
---|
463 | int ret; |
---|
464 | int flags; |
---|
465 | int i; |
---|
466 | struct page *page; |
---|
467 | gntdev_file_private_data_t *private_data = flip->private_data; |
---|
468 | |
---|
469 | if (unlikely(!private_data)) { |
---|
470 | printk(KERN_ERR "File's private data is NULL.\n"); |
---|
471 | return -EINVAL; |
---|
472 | } |
---|
473 | |
---|
474 | if (unlikely((size <= 0) || (size + slot_index) > MAX_GRANTS)) { |
---|
475 | printk(KERN_ERR "Invalid number of pages or offset" |
---|
476 | "(num_pages = %d, first_slot = %ld).\n", |
---|
477 | size, slot_index); |
---|
478 | return -ENXIO; |
---|
479 | } |
---|
480 | |
---|
481 | if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) { |
---|
482 | printk(KERN_ERR "Writable mappings must be shared.\n"); |
---|
483 | return -EINVAL; |
---|
484 | } |
---|
485 | |
---|
486 | /* Slots must be in the NOT_YET_MAPPED state. */ |
---|
487 | down_write(&private_data->grants_sem); |
---|
488 | for (i = 0; i < size; ++i) { |
---|
489 | if (private_data->grants[slot_index + i].state != |
---|
490 | GNTDEV_SLOT_NOT_YET_MAPPED) { |
---|
491 | printk(KERN_ERR "Slot (index = %ld) is in the wrong " |
---|
492 | "state (%d).\n", slot_index + i, |
---|
493 | private_data->grants[slot_index + i].state); |
---|
494 | up_write(&private_data->grants_sem); |
---|
495 | return -EINVAL; |
---|
496 | } |
---|
497 | } |
---|
498 | |
---|
499 | /* Install the hook for unmapping. */ |
---|
500 | vma->vm_ops = &gntdev_vmops; |
---|
501 | |
---|
502 | /* The VM area contains pages from another VM. */ |
---|
503 | vma->vm_flags |= VM_FOREIGN; |
---|
504 | vma->vm_private_data = kzalloc(size * sizeof(struct page_struct *), |
---|
505 | GFP_KERNEL); |
---|
506 | if (vma->vm_private_data == NULL) { |
---|
507 | printk(KERN_ERR "Couldn't allocate mapping structure for VM " |
---|
508 | "area.\n"); |
---|
509 | return -ENOMEM; |
---|
510 | } |
---|
511 | |
---|
512 | /* This flag prevents Bad PTE errors when the memory is unmapped. */ |
---|
513 | vma->vm_flags |= VM_RESERVED; |
---|
514 | |
---|
515 | /* This flag prevents this VM area being copied on a fork(). A better |
---|
516 | * behaviour might be to explicitly carry out the appropriate mappings |
---|
517 | * on fork(), but I don't know if there's a hook for this. |
---|
518 | */ |
---|
519 | vma->vm_flags |= VM_DONTCOPY; |
---|
520 | |
---|
521 | #ifdef CONFIG_X86 |
---|
522 | /* This flag ensures that the page tables are not unpinned before the |
---|
523 | * VM area is unmapped. Therefore Xen still recognises the PTE as |
---|
524 | * belonging to an L1 pagetable, and the grant unmap operation will |
---|
525 | * succeed, even if the process does not exit cleanly. |
---|
526 | */ |
---|
527 | vma->vm_mm->context.has_foreign_mappings = 1; |
---|
528 | #endif |
---|
529 | |
---|
530 | for (i = 0; i < size; ++i) { |
---|
531 | |
---|
532 | flags = GNTMAP_host_map; |
---|
533 | if (!(vma->vm_flags & VM_WRITE)) |
---|
534 | flags |= GNTMAP_readonly; |
---|
535 | |
---|
536 | kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i); |
---|
537 | user_vaddr = get_user_vaddr(vma, i); |
---|
538 | page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT); |
---|
539 | |
---|
540 | gnttab_set_map_op(&op, kernel_vaddr, flags, |
---|
541 | private_data->grants[slot_index+i] |
---|
542 | .u.valid.ref, |
---|
543 | private_data->grants[slot_index+i] |
---|
544 | .u.valid.domid); |
---|
545 | |
---|
546 | /* Carry out the mapping of the grant reference. */ |
---|
547 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, |
---|
548 | &op, 1); |
---|
549 | BUG_ON(ret); |
---|
550 | if (op.status) { |
---|
551 | printk(KERN_ERR "Error mapping the grant reference " |
---|
552 | "into the kernel (%d). domid = %d; ref = %d\n", |
---|
553 | op.status, |
---|
554 | private_data->grants[slot_index+i] |
---|
555 | .u.valid.domid, |
---|
556 | private_data->grants[slot_index+i] |
---|
557 | .u.valid.ref); |
---|
558 | goto undo_map_out; |
---|
559 | } |
---|
560 | |
---|
561 | /* Store a reference to the page that will be mapped into user |
---|
562 | * space. |
---|
563 | */ |
---|
564 | ((struct page **) vma->vm_private_data)[i] = page; |
---|
565 | |
---|
566 | /* Mark mapped page as reserved. */ |
---|
567 | SetPageReserved(page); |
---|
568 | |
---|
569 | /* Record the grant handle, for use in the unmap operation. */ |
---|
570 | private_data->grants[slot_index+i].u.valid.kernel_handle = |
---|
571 | op.handle; |
---|
572 | private_data->grants[slot_index+i].u.valid.dev_bus_addr = |
---|
573 | op.dev_bus_addr; |
---|
574 | |
---|
575 | private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED; |
---|
576 | private_data->grants[slot_index+i].u.valid.user_handle = |
---|
577 | GNTDEV_INVALID_HANDLE; |
---|
578 | |
---|
579 | /* Now perform the mapping to user space. */ |
---|
580 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
---|
581 | |
---|
582 | /* NOT USING SHADOW PAGE TABLES. */ |
---|
583 | /* In this case, we map the grant(s) straight into user |
---|
584 | * space. |
---|
585 | */ |
---|
586 | |
---|
587 | /* Get the machine address of the PTE for the user |
---|
588 | * page. |
---|
589 | */ |
---|
590 | if ((ret = create_lookup_pte_addr(vma->vm_mm, |
---|
591 | vma->vm_start |
---|
592 | + (i << PAGE_SHIFT), |
---|
593 | &ptep))) |
---|
594 | { |
---|
595 | printk(KERN_ERR "Error obtaining PTE pointer " |
---|
596 | "(%d).\n", ret); |
---|
597 | goto undo_map_out; |
---|
598 | } |
---|
599 | |
---|
600 | /* Configure the map operation. */ |
---|
601 | |
---|
602 | /* The reference is to be used by host CPUs. */ |
---|
603 | flags = GNTMAP_host_map; |
---|
604 | |
---|
605 | /* Specifies a user space mapping. */ |
---|
606 | flags |= GNTMAP_application_map; |
---|
607 | |
---|
608 | /* The map request contains the machine address of the |
---|
609 | * PTE to update. |
---|
610 | */ |
---|
611 | flags |= GNTMAP_contains_pte; |
---|
612 | |
---|
613 | if (!(vma->vm_flags & VM_WRITE)) |
---|
614 | flags |= GNTMAP_readonly; |
---|
615 | |
---|
616 | gnttab_set_map_op(&op, ptep, flags, |
---|
617 | private_data->grants[slot_index+i] |
---|
618 | .u.valid.ref, |
---|
619 | private_data->grants[slot_index+i] |
---|
620 | .u.valid.domid); |
---|
621 | |
---|
622 | /* Carry out the mapping of the grant reference. */ |
---|
623 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, |
---|
624 | &op, 1); |
---|
625 | BUG_ON(ret); |
---|
626 | if (op.status) { |
---|
627 | printk(KERN_ERR "Error mapping the grant " |
---|
628 | "reference into user space (%d). domid " |
---|
629 | "= %d; ref = %d\n", op.status, |
---|
630 | private_data->grants[slot_index+i].u |
---|
631 | .valid.domid, |
---|
632 | private_data->grants[slot_index+i].u |
---|
633 | .valid.ref); |
---|
634 | goto undo_map_out; |
---|
635 | } |
---|
636 | |
---|
637 | /* Record the grant handle, for use in the unmap |
---|
638 | * operation. |
---|
639 | */ |
---|
640 | private_data->grants[slot_index+i].u. |
---|
641 | valid.user_handle = op.handle; |
---|
642 | |
---|
643 | /* Update p2m structure with the new mapping. */ |
---|
644 | set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT, |
---|
645 | FOREIGN_FRAME(private_data-> |
---|
646 | grants[slot_index+i] |
---|
647 | .u.valid.dev_bus_addr |
---|
648 | >> PAGE_SHIFT)); |
---|
649 | } else { |
---|
650 | /* USING SHADOW PAGE TABLES. */ |
---|
651 | /* In this case, we simply insert the page into the VM |
---|
652 | * area. */ |
---|
653 | ret = vm_insert_page(vma, user_vaddr, page); |
---|
654 | } |
---|
655 | |
---|
656 | } |
---|
657 | |
---|
658 | up_write(&private_data->grants_sem); |
---|
659 | return 0; |
---|
660 | |
---|
661 | undo_map_out: |
---|
662 | /* If we have a mapping failure, the unmapping will be taken care of |
---|
663 | * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte(). |
---|
664 | * All we need to do here is free the vma_private_data. |
---|
665 | */ |
---|
666 | kfree(vma->vm_private_data); |
---|
667 | |
---|
668 | /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file |
---|
669 | * to NULL on failure. However, we need this in gntdev_clear_pte() to |
---|
670 | * unmap the grants. Therefore, we smuggle a reference to the file's |
---|
671 | * private data in the VM area's private data pointer. |
---|
672 | */ |
---|
673 | vma->vm_private_data = private_data; |
---|
674 | |
---|
675 | up_write(&private_data->grants_sem); |
---|
676 | |
---|
677 | return -ENOMEM; |
---|
678 | } |
---|
679 | |
---|
680 | static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr, |
---|
681 | pte_t *ptep, int is_fullmm) |
---|
682 | { |
---|
683 | int slot_index, ret; |
---|
684 | pte_t copy; |
---|
685 | struct gnttab_unmap_grant_ref op; |
---|
686 | gntdev_file_private_data_t *private_data; |
---|
687 | |
---|
688 | /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file |
---|
689 | * to NULL on failure. However, we need this in gntdev_clear_pte() to |
---|
690 | * unmap the grants. Therefore, we smuggle a reference to the file's |
---|
691 | * private data in the VM area's private data pointer. |
---|
692 | */ |
---|
693 | if (vma->vm_file) { |
---|
694 | private_data = (gntdev_file_private_data_t *) |
---|
695 | vma->vm_file->private_data; |
---|
696 | } else if (vma->vm_private_data) { |
---|
697 | private_data = (gntdev_file_private_data_t *) |
---|
698 | vma->vm_private_data; |
---|
699 | } else { |
---|
700 | private_data = NULL; /* gcc warning */ |
---|
701 | BUG(); |
---|
702 | } |
---|
703 | |
---|
704 | /* Copy the existing value of the PTE for returning. */ |
---|
705 | copy = *ptep; |
---|
706 | |
---|
707 | /* Calculate the grant relating to this PTE. */ |
---|
708 | slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); |
---|
709 | |
---|
710 | /* Only unmap grants if the slot has been mapped. This could be being |
---|
711 | * called from a failing mmap(). |
---|
712 | */ |
---|
713 | if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) { |
---|
714 | |
---|
715 | /* First, we clear the user space mapping, if it has been made. |
---|
716 | */ |
---|
717 | if (private_data->grants[slot_index].u.valid.user_handle != |
---|
718 | GNTDEV_INVALID_HANDLE && |
---|
719 | !xen_feature(XENFEAT_auto_translated_physmap)) { |
---|
720 | /* NOT USING SHADOW PAGE TABLES. */ |
---|
721 | gnttab_set_unmap_op(&op, virt_to_machine(ptep), |
---|
722 | GNTMAP_contains_pte, |
---|
723 | private_data->grants[slot_index] |
---|
724 | .u.valid.user_handle); |
---|
725 | ret = HYPERVISOR_grant_table_op( |
---|
726 | GNTTABOP_unmap_grant_ref, &op, 1); |
---|
727 | BUG_ON(ret); |
---|
728 | if (op.status) |
---|
729 | printk("User unmap grant status = %d\n", |
---|
730 | op.status); |
---|
731 | } else { |
---|
732 | /* USING SHADOW PAGE TABLES. */ |
---|
733 | pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm); |
---|
734 | } |
---|
735 | |
---|
736 | /* Finally, we unmap the grant from kernel space. */ |
---|
737 | gnttab_set_unmap_op(&op, |
---|
738 | get_kernel_vaddr(private_data, slot_index), |
---|
739 | GNTMAP_host_map, |
---|
740 | private_data->grants[slot_index].u.valid |
---|
741 | .kernel_handle); |
---|
742 | ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, |
---|
743 | &op, 1); |
---|
744 | BUG_ON(ret); |
---|
745 | if (op.status) |
---|
746 | printk("Kernel unmap grant status = %d\n", op.status); |
---|
747 | |
---|
748 | |
---|
749 | /* Return slot to the not-yet-mapped state, so that it may be |
---|
750 | * mapped again, or removed by a subsequent ioctl. |
---|
751 | */ |
---|
752 | private_data->grants[slot_index].state = |
---|
753 | GNTDEV_SLOT_NOT_YET_MAPPED; |
---|
754 | |
---|
755 | /* Invalidate the physical to machine mapping for this page. */ |
---|
756 | set_phys_to_machine(__pa(get_kernel_vaddr(private_data, |
---|
757 | slot_index)) |
---|
758 | >> PAGE_SHIFT, INVALID_P2M_ENTRY); |
---|
759 | |
---|
760 | } else { |
---|
761 | pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm); |
---|
762 | } |
---|
763 | |
---|
764 | return copy; |
---|
765 | } |
---|
766 | |
---|
767 | /* "Destructor" for a VM area. |
---|
768 | */ |
---|
769 | static void gntdev_vma_close(struct vm_area_struct *vma) { |
---|
770 | if (vma->vm_private_data) { |
---|
771 | kfree(vma->vm_private_data); |
---|
772 | } |
---|
773 | } |
---|
774 | |
---|
775 | /* Called when an ioctl is made on the device. |
---|
776 | */ |
---|
777 | static int gntdev_ioctl(struct inode *inode, struct file *flip, |
---|
778 | unsigned int cmd, unsigned long arg) |
---|
779 | { |
---|
780 | int rc = 0; |
---|
781 | gntdev_file_private_data_t *private_data = |
---|
782 | (gntdev_file_private_data_t *) flip->private_data; |
---|
783 | |
---|
784 | switch (cmd) { |
---|
785 | case IOCTL_GNTDEV_MAP_GRANT_REF: |
---|
786 | { |
---|
787 | struct ioctl_gntdev_map_grant_ref op; |
---|
788 | down_write(&private_data->grants_sem); |
---|
789 | down_write(&private_data->free_list_sem); |
---|
790 | |
---|
791 | if ((rc = copy_from_user(&op, (void __user *) arg, |
---|
792 | sizeof(op)))) { |
---|
793 | rc = -EFAULT; |
---|
794 | goto map_out; |
---|
795 | } |
---|
796 | if (unlikely(op.count <= 0)) { |
---|
797 | rc = -EINVAL; |
---|
798 | goto map_out; |
---|
799 | } |
---|
800 | |
---|
801 | if (op.count == 1) { |
---|
802 | if ((rc = add_grant_reference(flip, &op.refs[0], |
---|
803 | &op.index)) < 0) { |
---|
804 | printk(KERN_ERR "Adding grant reference " |
---|
805 | "failed (%d).\n", rc); |
---|
806 | goto map_out; |
---|
807 | } |
---|
808 | } else { |
---|
809 | struct ioctl_gntdev_grant_ref *refs, *u; |
---|
810 | refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL); |
---|
811 | if (!refs) { |
---|
812 | rc = -ENOMEM; |
---|
813 | goto map_out; |
---|
814 | } |
---|
815 | u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs; |
---|
816 | if ((rc = copy_from_user(refs, |
---|
817 | (void __user *)u, |
---|
818 | sizeof(*refs) * op.count))) { |
---|
819 | printk(KERN_ERR "Copying refs from user failed" |
---|
820 | " (%d).\n", rc); |
---|
821 | rc = -EINVAL; |
---|
822 | goto map_out; |
---|
823 | } |
---|
824 | if ((rc = find_contiguous_free_range(flip, op.count)) |
---|
825 | < 0) { |
---|
826 | printk(KERN_ERR "Finding contiguous range " |
---|
827 | "failed (%d).\n", rc); |
---|
828 | kfree(refs); |
---|
829 | goto map_out; |
---|
830 | } |
---|
831 | op.index = rc << PAGE_SHIFT; |
---|
832 | if ((rc = add_grant_references(flip, op.count, |
---|
833 | refs, rc))) { |
---|
834 | printk(KERN_ERR "Adding grant references " |
---|
835 | "failed (%d).\n", rc); |
---|
836 | kfree(refs); |
---|
837 | goto map_out; |
---|
838 | } |
---|
839 | compress_free_list(flip); |
---|
840 | kfree(refs); |
---|
841 | } |
---|
842 | if ((rc = copy_to_user((void __user *) arg, |
---|
843 | &op, |
---|
844 | sizeof(op)))) { |
---|
845 | printk(KERN_ERR "Copying result back to user failed " |
---|
846 | "(%d)\n", rc); |
---|
847 | rc = -EFAULT; |
---|
848 | goto map_out; |
---|
849 | } |
---|
850 | map_out: |
---|
851 | up_write(&private_data->grants_sem); |
---|
852 | up_write(&private_data->free_list_sem); |
---|
853 | return rc; |
---|
854 | } |
---|
855 | case IOCTL_GNTDEV_UNMAP_GRANT_REF: |
---|
856 | { |
---|
857 | struct ioctl_gntdev_unmap_grant_ref op; |
---|
858 | int i, start_index; |
---|
859 | |
---|
860 | down_write(&private_data->grants_sem); |
---|
861 | down_write(&private_data->free_list_sem); |
---|
862 | |
---|
863 | if ((rc = copy_from_user(&op, |
---|
864 | (void __user *) arg, |
---|
865 | sizeof(op)))) { |
---|
866 | rc = -EFAULT; |
---|
867 | goto unmap_out; |
---|
868 | } |
---|
869 | |
---|
870 | start_index = op.index >> PAGE_SHIFT; |
---|
871 | |
---|
872 | /* First, check that all pages are in the NOT_YET_MAPPED |
---|
873 | * state. |
---|
874 | */ |
---|
875 | for (i = 0; i < op.count; ++i) { |
---|
876 | if (unlikely |
---|
877 | (private_data->grants[start_index + i].state |
---|
878 | != GNTDEV_SLOT_NOT_YET_MAPPED)) { |
---|
879 | if (private_data->grants[start_index + i].state |
---|
880 | == GNTDEV_SLOT_INVALID) { |
---|
881 | printk(KERN_ERR |
---|
882 | "Tried to remove an invalid " |
---|
883 | "grant at offset 0x%x.", |
---|
884 | (start_index + i) |
---|
885 | << PAGE_SHIFT); |
---|
886 | rc = -EINVAL; |
---|
887 | } else { |
---|
888 | printk(KERN_ERR |
---|
889 | "Tried to remove a grant which " |
---|
890 | "is currently mmap()-ed at " |
---|
891 | "offset 0x%x.", |
---|
892 | (start_index + i) |
---|
893 | << PAGE_SHIFT); |
---|
894 | rc = -EBUSY; |
---|
895 | } |
---|
896 | goto unmap_out; |
---|
897 | } |
---|
898 | } |
---|
899 | |
---|
900 | /* Unmap pages and add them to the free list. |
---|
901 | */ |
---|
902 | for (i = 0; i < op.count; ++i) { |
---|
903 | private_data->grants[start_index+i].state = |
---|
904 | GNTDEV_SLOT_INVALID; |
---|
905 | private_data->grants[start_index+i].u.free_list_index = |
---|
906 | private_data->free_list_size; |
---|
907 | private_data->free_list[private_data->free_list_size] = |
---|
908 | start_index + i; |
---|
909 | ++private_data->free_list_size; |
---|
910 | } |
---|
911 | compress_free_list(flip); |
---|
912 | |
---|
913 | unmap_out: |
---|
914 | up_write(&private_data->grants_sem); |
---|
915 | up_write(&private_data->free_list_sem); |
---|
916 | return rc; |
---|
917 | } |
---|
918 | case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: |
---|
919 | { |
---|
920 | struct ioctl_gntdev_get_offset_for_vaddr op; |
---|
921 | struct vm_area_struct *vma; |
---|
922 | unsigned long vaddr; |
---|
923 | |
---|
924 | if ((rc = copy_from_user(&op, |
---|
925 | (void __user *) arg, |
---|
926 | sizeof(op)))) { |
---|
927 | rc = -EFAULT; |
---|
928 | goto get_offset_out; |
---|
929 | } |
---|
930 | vaddr = (unsigned long)op.vaddr; |
---|
931 | |
---|
932 | down_read(¤t->mm->mmap_sem); |
---|
933 | vma = find_vma(current->mm, vaddr); |
---|
934 | if (vma == NULL) { |
---|
935 | rc = -EFAULT; |
---|
936 | goto get_offset_unlock_out; |
---|
937 | } |
---|
938 | if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) { |
---|
939 | printk(KERN_ERR "The vaddr specified does not belong " |
---|
940 | "to a gntdev instance: %#lx\n", vaddr); |
---|
941 | rc = -EFAULT; |
---|
942 | goto get_offset_unlock_out; |
---|
943 | } |
---|
944 | if (vma->vm_start != vaddr) { |
---|
945 | printk(KERN_ERR "The vaddr specified in an " |
---|
946 | "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at " |
---|
947 | "the start of the VM area. vma->vm_start = " |
---|
948 | "%#lx; vaddr = %#lx\n", |
---|
949 | vma->vm_start, vaddr); |
---|
950 | rc = -EFAULT; |
---|
951 | goto get_offset_unlock_out; |
---|
952 | } |
---|
953 | op.offset = vma->vm_pgoff << PAGE_SHIFT; |
---|
954 | op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; |
---|
955 | up_read(¤t->mm->mmap_sem); |
---|
956 | if ((rc = copy_to_user((void __user *) arg, |
---|
957 | &op, |
---|
958 | sizeof(op)))) { |
---|
959 | rc = -EFAULT; |
---|
960 | goto get_offset_out; |
---|
961 | } |
---|
962 | goto get_offset_out; |
---|
963 | get_offset_unlock_out: |
---|
964 | up_read(¤t->mm->mmap_sem); |
---|
965 | get_offset_out: |
---|
966 | return rc; |
---|
967 | } |
---|
968 | default: |
---|
969 | return -ENOIOCTLCMD; |
---|
970 | } |
---|
971 | |
---|
972 | return 0; |
---|
973 | } |
---|