1 | # HG changeset patch |
---|
2 | # User kfraser@localhost.localdomain |
---|
3 | # Node ID 67a06a9b7b1dca707e1cd3b08ae0a341d6e97b3d |
---|
4 | # Parent 3f0ca90351e268084fbdb733d70fc596cb46537d |
---|
5 | [HVM] qemu: Add guest address-space mapping cache. |
---|
6 | |
---|
7 | On IA32 host or IA32 PAE host, at present, generally, we can't create |
---|
8 | an HVM guest with more than 2G memory, because generally it's almost |
---|
9 | impossible for Qemu to find a large enough and consecutive virtual |
---|
10 | address space to map an HVM guest's whole physical address space. |
---|
11 | The attached patch fixes this issue using dynamic mapping based on |
---|
12 | little blocks of memory. |
---|
13 | |
---|
14 | Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> |
---|
15 | Signed-off-by: Dexuan Cui <dexuan.cui@intel.com> |
---|
16 | Signed-off-by: Keir Fraser <keir@xensource.com> |
---|
17 | |
---|
18 | Index: ioemu/vl.c |
---|
19 | =================================================================== |
---|
20 | --- ioemu.orig/vl.c 2007-05-03 15:12:21.000000000 +0100 |
---|
21 | +++ ioemu/vl.c 2007-05-03 15:12:41.000000000 +0100 |
---|
22 | @@ -286,7 +286,7 @@ |
---|
23 | for(i = start; i < start + length; i += size) { |
---|
24 | ioport_write_table[bsize][i] = func; |
---|
25 | if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque) |
---|
26 | - hw_error("register_ioport_read: invalid opaque"); |
---|
27 | + hw_error("register_ioport_write: invalid opaque"); |
---|
28 | ioport_opaque[i] = opaque; |
---|
29 | } |
---|
30 | return 0; |
---|
31 | @@ -5894,6 +5894,157 @@ |
---|
32 | suspend_requested = 1; |
---|
33 | } |
---|
34 | |
---|
35 | +#if defined(MAPCACHE) |
---|
36 | + |
---|
37 | +#if defined(__i386__) |
---|
38 | +#define MAX_MCACHE_SIZE 0x40000000 /* 1GB max for x86 */ |
---|
39 | +#define MCACHE_BUCKET_SHIFT 16 |
---|
40 | +#elif defined(__x86_64__) |
---|
41 | +#define MAX_MCACHE_SIZE 0x1000000000 /* 64GB max for x86_64 */ |
---|
42 | +#define MCACHE_BUCKET_SHIFT 20 |
---|
43 | +#endif |
---|
44 | + |
---|
45 | +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) |
---|
46 | + |
---|
47 | +#define BITS_PER_LONG (sizeof(long)*8) |
---|
48 | +#define BITS_TO_LONGS(bits) \ |
---|
49 | + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) |
---|
50 | +#define DECLARE_BITMAP(name,bits) \ |
---|
51 | + unsigned long name[BITS_TO_LONGS(bits)] |
---|
52 | +#define test_bit(bit,map) \ |
---|
53 | + (!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG)))) |
---|
54 | + |
---|
55 | +struct map_cache { |
---|
56 | + unsigned long paddr_index; |
---|
57 | + uint8_t *vaddr_base; |
---|
58 | + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>PAGE_SHIFT); |
---|
59 | +}; |
---|
60 | + |
---|
61 | +static struct map_cache *mapcache_entry; |
---|
62 | +static unsigned long nr_buckets; |
---|
63 | + |
---|
64 | +/* For most cases (>99.9%), the page address is the same. */ |
---|
65 | +static unsigned long last_address_index = ~0UL; |
---|
66 | +static uint8_t *last_address_vaddr; |
---|
67 | + |
---|
68 | +static int qemu_map_cache_init(void) |
---|
69 | +{ |
---|
70 | + unsigned long size; |
---|
71 | + |
---|
72 | + nr_buckets = (((MAX_MCACHE_SIZE >> PAGE_SHIFT) + |
---|
73 | + (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1) >> |
---|
74 | + (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)); |
---|
75 | + fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets); |
---|
76 | + |
---|
77 | + /* |
---|
78 | + * Use mmap() directly: lets us allocate a big hash table with no up-front |
---|
79 | + * cost in storage space. The OS will allocate memory only for the buckets |
---|
80 | + * that we actually use. All others will contain all zeroes. |
---|
81 | + */ |
---|
82 | + size = nr_buckets * sizeof(struct map_cache); |
---|
83 | + size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); |
---|
84 | + mapcache_entry = mmap(NULL, size, PROT_READ|PROT_WRITE, |
---|
85 | + MAP_SHARED|MAP_ANONYMOUS, 0, 0); |
---|
86 | + if (mapcache_entry == MAP_FAILED) { |
---|
87 | + errno = ENOMEM; |
---|
88 | + return -1; |
---|
89 | + } |
---|
90 | + |
---|
91 | + return 0; |
---|
92 | +} |
---|
93 | + |
---|
94 | +static void qemu_remap_bucket(struct map_cache *entry, |
---|
95 | + unsigned long address_index) |
---|
96 | +{ |
---|
97 | + uint8_t *vaddr_base; |
---|
98 | + unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT]; |
---|
99 | + unsigned int i, j; |
---|
100 | + |
---|
101 | + if (entry->vaddr_base != NULL) { |
---|
102 | + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); |
---|
103 | + if (errno) { |
---|
104 | + fprintf(logfile, "unmap fails %d\n", errno); |
---|
105 | + exit(-1); |
---|
106 | + } |
---|
107 | + } |
---|
108 | + |
---|
109 | + for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i++) |
---|
110 | + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-PAGE_SHIFT)) + i; |
---|
111 | + |
---|
112 | + vaddr_base = xc_map_foreign_batch(xc_handle, domid, PROT_READ|PROT_WRITE, |
---|
113 | + pfns, MCACHE_BUCKET_SIZE >> PAGE_SHIFT); |
---|
114 | + if (vaddr_base == NULL) { |
---|
115 | + fprintf(logfile, "xc_map_foreign_batch error %d\n", errno); |
---|
116 | + exit(-1); |
---|
117 | + } |
---|
118 | + |
---|
119 | + entry->vaddr_base = vaddr_base; |
---|
120 | + entry->paddr_index = address_index; |
---|
121 | + |
---|
122 | + for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i += BITS_PER_LONG) { |
---|
123 | + unsigned long word = 0; |
---|
124 | + j = ((i + BITS_PER_LONG) > (MCACHE_BUCKET_SIZE >> PAGE_SHIFT)) ? |
---|
125 | + (MCACHE_BUCKET_SIZE >> PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG; |
---|
126 | + while (j > 0) |
---|
127 | + word = (word << 1) | !(pfns[i + --j] & 0xF0000000UL); |
---|
128 | + entry->valid_mapping[i / BITS_PER_LONG] = word; |
---|
129 | + } |
---|
130 | +} |
---|
131 | + |
---|
132 | +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr) |
---|
133 | +{ |
---|
134 | + struct map_cache *entry; |
---|
135 | + unsigned long address_index = phys_addr >> MCACHE_BUCKET_SHIFT; |
---|
136 | + unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1); |
---|
137 | + |
---|
138 | + if (address_index == last_address_index) |
---|
139 | + return last_address_vaddr + address_offset; |
---|
140 | + |
---|
141 | + entry = &mapcache_entry[address_index % nr_buckets]; |
---|
142 | + |
---|
143 | + if (entry->vaddr_base == NULL || entry->paddr_index != address_index || |
---|
144 | + !test_bit(address_offset>>PAGE_SHIFT, entry->valid_mapping)) |
---|
145 | + qemu_remap_bucket(entry, address_index); |
---|
146 | + |
---|
147 | + if (!test_bit(address_offset>>PAGE_SHIFT, entry->valid_mapping)) |
---|
148 | + return NULL; |
---|
149 | + |
---|
150 | + last_address_index = address_index; |
---|
151 | + last_address_vaddr = entry->vaddr_base; |
---|
152 | + |
---|
153 | + return last_address_vaddr + address_offset; |
---|
154 | +} |
---|
155 | + |
---|
156 | +void qemu_invalidate_map_cache(void) |
---|
157 | +{ |
---|
158 | + unsigned long i; |
---|
159 | + |
---|
160 | + mapcache_lock(); |
---|
161 | + |
---|
162 | + for (i = 0; i < nr_buckets; i++) { |
---|
163 | + struct map_cache *entry = &mapcache_entry[i]; |
---|
164 | + |
---|
165 | + if (entry->vaddr_base == NULL) |
---|
166 | + continue; |
---|
167 | + |
---|
168 | + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); |
---|
169 | + if (errno) { |
---|
170 | + fprintf(logfile, "unmap fails %d\n", errno); |
---|
171 | + exit(-1); |
---|
172 | + } |
---|
173 | + |
---|
174 | + entry->paddr_index = 0; |
---|
175 | + entry->vaddr_base = NULL; |
---|
176 | + } |
---|
177 | + |
---|
178 | + last_address_index = ~0UL; |
---|
179 | + last_address_vaddr = NULL; |
---|
180 | + |
---|
181 | + mapcache_unlock(); |
---|
182 | +} |
---|
183 | + |
---|
184 | +#endif /* defined(MAPCACHE) */ |
---|
185 | + |
---|
186 | int main(int argc, char **argv) |
---|
187 | { |
---|
188 | #ifdef CONFIG_GDBSTUB |
---|
189 | @@ -5930,8 +6081,11 @@ |
---|
190 | unsigned long ioreq_pfn; |
---|
191 | extern void *shared_page; |
---|
192 | extern void *buffered_io_page; |
---|
193 | - extern void *buffered_pio_page; |
---|
194 | +#ifdef __ia64__ |
---|
195 | unsigned long nr_pages; |
---|
196 | + xen_pfn_t *page_array; |
---|
197 | + extern void *buffered_pio_page; |
---|
198 | +#endif |
---|
199 | |
---|
200 | char qemu_dm_logfilename[64]; |
---|
201 | |
---|
202 | @@ -6221,6 +6375,7 @@ |
---|
203 | break; |
---|
204 | case QEMU_OPTION_m: |
---|
205 | ram_size = atol(optarg) * 1024 * 1024; |
---|
206 | + ram_size = (uint64_t)atol(optarg) * 1024 * 1024; |
---|
207 | if (ram_size <= 0) |
---|
208 | help(); |
---|
209 | #ifndef CONFIG_DM |
---|
210 | @@ -6482,30 +6637,15 @@ |
---|
211 | |
---|
212 | #if defined(__i386__) || defined(__x86_64__) |
---|
213 | |
---|
214 | - nr_pages = ram_size/PAGE_SIZE; |
---|
215 | - |
---|
216 | - page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t)); |
---|
217 | - if (page_array == NULL) { |
---|
218 | - fprintf(logfile, "malloc returned error %d\n", errno); |
---|
219 | - exit(-1); |
---|
220 | - } |
---|
221 | - |
---|
222 | - for ( i = 0; i < nr_pages; i++) |
---|
223 | - page_array[i] = i; |
---|
224 | - |
---|
225 | - phys_ram_base = xc_map_foreign_batch(xc_handle, domid, |
---|
226 | - PROT_READ|PROT_WRITE, page_array, |
---|
227 | - nr_pages); |
---|
228 | - if (phys_ram_base == NULL) { |
---|
229 | - fprintf(logfile, "batch map guest memory returned error %d\n", errno); |
---|
230 | + if (qemu_map_cache_init()) { |
---|
231 | + fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno); |
---|
232 | exit(-1); |
---|
233 | } |
---|
234 | |
---|
235 | xc_get_hvm_param(xc_handle, domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn); |
---|
236 | fprintf(logfile, "shared page at pfn %lx\n", ioreq_pfn); |
---|
237 | shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, |
---|
238 | - PROT_READ|PROT_WRITE, |
---|
239 | - page_array[ioreq_pfn]); |
---|
240 | + PROT_READ|PROT_WRITE, ioreq_pfn); |
---|
241 | if (shared_page == NULL) { |
---|
242 | fprintf(logfile, "map shared IO page returned error %d\n", errno); |
---|
243 | exit(-1); |
---|
244 | @@ -6514,15 +6654,12 @@ |
---|
245 | xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn); |
---|
246 | fprintf(logfile, "buffered io page at pfn %lx\n", ioreq_pfn); |
---|
247 | buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, |
---|
248 | - PROT_READ|PROT_WRITE, |
---|
249 | - page_array[ioreq_pfn]); |
---|
250 | + PROT_READ|PROT_WRITE, ioreq_pfn); |
---|
251 | if (buffered_io_page == NULL) { |
---|
252 | fprintf(logfile, "map buffered IO page returned error %d\n", errno); |
---|
253 | exit(-1); |
---|
254 | } |
---|
255 | |
---|
256 | - free(page_array); |
---|
257 | - |
---|
258 | #elif defined(__ia64__) |
---|
259 | |
---|
260 | nr_pages = ram_size/PAGE_SIZE; |
---|
261 | Index: ioemu/target-i386-dm/exec-dm.c |
---|
262 | =================================================================== |
---|
263 | --- ioemu.orig/target-i386-dm/exec-dm.c 2007-05-03 15:10:22.000000000 +0100 |
---|
264 | +++ ioemu/target-i386-dm/exec-dm.c 2007-05-03 15:12:34.000000000 +0100 |
---|
265 | @@ -36,6 +36,7 @@ |
---|
266 | |
---|
267 | #include "cpu.h" |
---|
268 | #include "exec-all.h" |
---|
269 | +#include "vl.h" |
---|
270 | |
---|
271 | //#define DEBUG_TB_INVALIDATE |
---|
272 | //#define DEBUG_FLUSH |
---|
273 | @@ -127,10 +128,17 @@ |
---|
274 | FILE *logfile; |
---|
275 | int loglevel; |
---|
276 | |
---|
277 | +#ifdef MAPCACHE |
---|
278 | +pthread_mutex_t mapcache_mutex; |
---|
279 | +#endif |
---|
280 | + |
---|
281 | void cpu_exec_init(CPUState *env) |
---|
282 | { |
---|
283 | CPUState **penv; |
---|
284 | int cpu_index; |
---|
285 | +#ifdef MAPCACHE |
---|
286 | + pthread_mutexattr_t mxattr; |
---|
287 | +#endif |
---|
288 | |
---|
289 | env->next_cpu = NULL; |
---|
290 | penv = &first_cpu; |
---|
291 | @@ -144,6 +152,14 @@ |
---|
292 | |
---|
293 | /* alloc dirty bits array */ |
---|
294 | phys_ram_dirty = qemu_malloc(phys_ram_size >> TARGET_PAGE_BITS); |
---|
295 | + |
---|
296 | +#ifdef MAPCACHE |
---|
297 | + /* setup memory access mutex to protect mapcache */ |
---|
298 | + pthread_mutexattr_init(&mxattr); |
---|
299 | + pthread_mutexattr_settype(&mxattr, PTHREAD_MUTEX_RECURSIVE); |
---|
300 | + pthread_mutex_init(&mapcache_mutex, &mxattr); |
---|
301 | + pthread_mutexattr_destroy(&mxattr); |
---|
302 | +#endif |
---|
303 | } |
---|
304 | |
---|
305 | /* enable or disable low levels log */ |
---|
306 | @@ -409,16 +425,11 @@ |
---|
307 | return 0; |
---|
308 | } |
---|
309 | |
---|
310 | -static inline int paddr_is_ram(target_phys_addr_t addr) |
---|
311 | -{ |
---|
312 | - /* Is this guest physical address RAM-backed? */ |
---|
313 | -#if defined(CONFIG_DM) && (defined(__i386__) || defined(__x86_64__)) |
---|
314 | - return ((addr < HVM_BELOW_4G_MMIO_START) || |
---|
315 | - (addr >= HVM_BELOW_4G_MMIO_START + HVM_BELOW_4G_MMIO_LENGTH)); |
---|
316 | -#else |
---|
317 | - return (addr < ram_size); |
---|
318 | +#if defined(__i386__) || defined(__x86_64__) |
---|
319 | +#define phys_ram_addr(x) (qemu_map_cache(x)) |
---|
320 | +#elif defined(__ia64__) |
---|
321 | +#define phys_ram_addr(x) ((addr < ram_size) ? (phys_ram_base + (x)) : NULL) |
---|
322 | #endif |
---|
323 | -} |
---|
324 | |
---|
325 | void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, |
---|
326 | int len, int is_write) |
---|
327 | @@ -426,13 +437,15 @@ |
---|
328 | int l, io_index; |
---|
329 | uint8_t *ptr; |
---|
330 | uint32_t val; |
---|
331 | - |
---|
332 | + |
---|
333 | + mapcache_lock(); |
---|
334 | + |
---|
335 | while (len > 0) { |
---|
336 | /* How much can we copy before the next page boundary? */ |
---|
337 | l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); |
---|
338 | if (l > len) |
---|
339 | l = len; |
---|
340 | - |
---|
341 | + |
---|
342 | io_index = iomem_index(addr); |
---|
343 | if (is_write) { |
---|
344 | if (io_index) { |
---|
345 | @@ -452,11 +465,11 @@ |
---|
346 | io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val); |
---|
347 | l = 1; |
---|
348 | } |
---|
349 | - } else if (paddr_is_ram(addr)) { |
---|
350 | + } else if ((ptr = phys_ram_addr(addr)) != NULL) { |
---|
351 | /* Reading from RAM */ |
---|
352 | - memcpy(phys_ram_base + addr, buf, l); |
---|
353 | + memcpy(ptr, buf, l); |
---|
354 | #ifdef __ia64__ |
---|
355 | - sync_icache((unsigned long)(phys_ram_base + addr), l); |
---|
356 | + sync_icache(ptr, l); |
---|
357 | #endif |
---|
358 | } |
---|
359 | } else { |
---|
360 | @@ -477,9 +490,9 @@ |
---|
361 | stb_raw(buf, val); |
---|
362 | l = 1; |
---|
363 | } |
---|
364 | - } else if (paddr_is_ram(addr)) { |
---|
365 | + } else if ((ptr = phys_ram_addr(addr)) != NULL) { |
---|
366 | /* Reading from RAM */ |
---|
367 | - memcpy(buf, phys_ram_base + addr, l); |
---|
368 | + memcpy(buf, ptr, l); |
---|
369 | } else { |
---|
370 | /* Neither RAM nor known MMIO space */ |
---|
371 | memset(buf, 0xff, len); |
---|
372 | @@ -489,6 +502,8 @@ |
---|
373 | buf += l; |
---|
374 | addr += l; |
---|
375 | } |
---|
376 | + |
---|
377 | + mapcache_unlock(); |
---|
378 | } |
---|
379 | #endif |
---|
380 | |
---|
381 | Index: ioemu/vl.h |
---|
382 | =================================================================== |
---|
383 | --- ioemu.orig/vl.h 2007-05-03 15:12:20.000000000 +0100 |
---|
384 | +++ ioemu/vl.h 2007-05-03 15:12:34.000000000 +0100 |
---|
385 | @@ -156,6 +156,28 @@ |
---|
386 | |
---|
387 | extern FILE *logfile; |
---|
388 | |
---|
389 | + |
---|
390 | +#if defined(__i386__) || defined(__x86_64__) |
---|
391 | + |
---|
392 | +#define MAPCACHE |
---|
393 | + |
---|
394 | +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr); |
---|
395 | +void qemu_invalidate_map_cache(void); |
---|
396 | + |
---|
397 | +#include <pthread.h> |
---|
398 | +extern pthread_mutex_t mapcache_mutex; |
---|
399 | +#define mapcache_lock() pthread_mutex_lock(&mapcache_mutex) |
---|
400 | +#define mapcache_unlock() pthread_mutex_unlock(&mapcache_mutex) |
---|
401 | + |
---|
402 | +#else |
---|
403 | + |
---|
404 | +#define qemu_invalidate_map_cache() ((void)0) |
---|
405 | + |
---|
406 | +#define mapcache_lock() ((void)0) |
---|
407 | +#define mapcache_unlock() ((void)0) |
---|
408 | + |
---|
409 | +#endif |
---|
410 | + |
---|
411 | extern int xc_handle; |
---|
412 | extern int domid; |
---|
413 | |
---|
414 | Index: ioemu/target-i386-dm/cpu.h |
---|
415 | =================================================================== |
---|
416 | --- ioemu.orig/target-i386-dm/cpu.h 2007-05-03 15:10:22.000000000 +0100 |
---|
417 | +++ ioemu/target-i386-dm/cpu.h 2007-05-03 15:12:21.000000000 +0100 |
---|
418 | @@ -25,7 +25,8 @@ |
---|
419 | #ifdef TARGET_X86_64 |
---|
420 | #define TARGET_LONG_BITS 64 |
---|
421 | #else |
---|
422 | -#define TARGET_LONG_BITS 32 |
---|
423 | +/* #define TARGET_LONG_BITS 32 */ |
---|
424 | +#define TARGET_LONG_BITS 64 /* for Qemu map cache */ |
---|
425 | #endif |
---|
426 | |
---|
427 | /* target supports implicit self modifying code */ |
---|
428 | Index: ioemu/target-i386-dm/helper2.c |
---|
429 | =================================================================== |
---|
430 | --- ioemu.orig/target-i386-dm/helper2.c 2007-05-03 15:12:19.000000000 +0100 |
---|
431 | +++ ioemu/target-i386-dm/helper2.c 2007-05-03 15:12:21.000000000 +0100 |
---|
432 | @@ -526,6 +526,9 @@ |
---|
433 | case IOREQ_TYPE_TIMEOFFSET: |
---|
434 | cpu_ioreq_timeoffset(env, req); |
---|
435 | break; |
---|
436 | + case IOREQ_TYPE_INVALIDATE: |
---|
437 | + qemu_invalidate_map_cache(); |
---|
438 | + break; |
---|
439 | default: |
---|
440 | hw_error("Invalid ioreq type 0x%x\n", req->type); |
---|
441 | } |
---|